@article {pmid38496489, year = {2024}, author = {Rinker, DC and Sauters, TJC and Steffen, K and Gumilang, A and Raja, HA and Rangel-Grimaldo, M and Pinzan, CF and de Castro, PA and Dos Reis, TF and Delbaje, E and Houbraken, J and Goldman, GH and Oberlies, NH and Rokas, A}, title = {Strain heterogeneity in a non-pathogenic fungus highlights factors contributing to virulence.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.03.08.583994}, pmid = {38496489}, abstract = {Fungal pathogens exhibit extensive strain heterogeneity, including variation in virulence. Whether closely related non-pathogenic species also exhibit strain heterogeneity remains unknown. Here, we comprehensively characterized the pathogenic potentials (i.e., the ability to cause morbidity and mortality) of 16 diverse strains of Aspergillus fischeri , a non-pathogenic close relative of the major pathogen Aspergillus fumigatus . In vitro immune response assays and in vivo virulence assays using a mouse model of pulmonary aspergillosis showed that A. fischeri strains varied widely in their pathogenic potential. Furthermore, pangenome analyses suggest that A. fischeri genomic and phenotypic diversity is even greater. Genomic, transcriptomic, and metabolomic profiling identified several pathways and secondary metabolites associated with variation in virulence. Notably, strain virulence was associated with the simultaneous presence of the secondary metabolites hexadehydroastechrome and gliotoxin. We submit that examining the pathogenic potentials of non-pathogenic close relatives is key for understanding the origins of fungal pathogenicity.}, } @article {pmid38495945, year = {2024}, author = {Lecomte, L and Árnyasi, M and Ferchaud, AL and Kent, M and Lien, S and Stenløkk, K and Sylvestre, F and Bernatchez, L and Mérot, C}, title = {Investigating structural variant, indel and single nucleotide polymorphism differentiation between locally adapted Atlantic salmon populations.}, journal = {Evolutionary applications}, volume = {17}, number = {3}, pages = {e13653}, pmid = {38495945}, issn = {1752-4571}, abstract = {Genomic structural variants (SVs) are now recognized as an integral component of intraspecific polymorphism and are known to contribute to evolutionary processes in various organisms. However, they are inherently difficult to detect and genotype from readily available short-read sequencing data, and therefore remain poorly documented in wild populations. Salmonid species displaying strong interpopulation variability in both life history traits and habitat characteristics, such as Atlantic salmon (Salmo salar), offer a prime context for studying adaptive polymorphism, but the contribution of SVs to fine-scale local adaptation has yet to be explored. Here, we performed a comparative analysis of SVs, single nucleotide polymorphisms (SNPs) and small indels (<50 bp) segregating in the Romaine and Puyjalon salmon, two putatively locally adapted populations inhabiting neighboring rivers (Québec, Canada) and showing pronounced variation in life history traits, namely growth, fecundity, and age at maturity and smoltification. We first catalogued polymorphism using a hybrid SV characterization approach pairing both short- (16X) and long-read sequencing (20X) for variant discovery with graph-based genotyping of SVs across 60 salmon genomes, along with characterization of SNPs and small indels from short reads. We thus identified 115,907 SVs, 8,777,832 SNPs and 1,089,321 short indels, with SVs covering 4.8 times more base pairs than SNPs. All three variant types revealed a highly congruent population structure and similar patterns of F ST and density variation along the genome. Finally, we performed outlier detection and redundancy analysis (RDA) to identify variants of interest in the putative local adaptation of Romaine and Puyjalon salmon. Genes located near these variants were enriched for biological processes related to nervous system function, suggesting that observed variation in traits such as age at smoltification could arise from differences in neural development. This study therefore demonstrates the feasibility of large-scale SV characterization and highlights its relevance for salmonid population genomics.}, } @article {pmid38492232, year = {2024}, author = {Poretsky, E and Cagirici, HB and Andorf, CM and Sen, TZ}, title = {Harnessing the predicted maize pan-interactome for putative gene function prediction and prioritization of candidate genes for important traits.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkae059}, pmid = {38492232}, issn = {2160-1836}, abstract = {The recent assembly and annotation of the 26 maize nested association mapping (NAM) population founder inbreds have enabled large-scale pan-genomic comparative studies. These studies have expanded our understanding of agronomically important traits by integrating pan-transcriptomic data with trait-specific gene candidates from previous association mapping results. In contrast to the availability of pan-transcriptomic data, obtaining reliable protein-protein interaction (PPI) data has remained a challenge due to its high cost and complexity. We generated predicted PPI networks for each of the 26 genomes using the established STRING database. The individual genome-interactomes were then integrated to generate core- and pan-interactomes. We deployed the PPI clustering algorithm ClusterONE to identify numerous PPI clusters that were functionally annotated using gene ontology (GO) functional enrichment, demonstrating a diverse range of enriched GO terms across different clusters. Additional cluster annotations were generated by integrating gene co-expression data and gene description annotations, providing additional useful information. We show that the functionally annotated PPI clusters establish a useful framework for protein function prediction and prioritization of candidate genes of interest. Our study not only provides a comprehensive resource of predicted PPI networks for 26 maize genomes, but also offers annotated interactome clusters for predicting protein functions and prioritizing gene candidates. The source code for the Python implementation of the analysis workflow and a standalone web application for accessing the analysis results are available at https://github.com/eporetsky/PanPPI.}, } @article {pmid38491145, year = {2024}, author = {Wang, Y and Tang, H and Wang, X and Sun, Y and Joseph, PV and Paterson, AH}, title = {Detection of colinear blocks and synteny and evolutionary analyses based on utilization of MCScanX.}, journal = {Nature protocols}, volume = {}, number = {}, pages = {}, pmid = {38491145}, issn = {1750-2799}, abstract = {As different taxa evolve, gene order often changes slowly enough that chromosomal 'blocks' with conserved gene orders (synteny) are discernible. The MCScanX toolkit (https://github.com/wyp1125/MCScanX) was published in 2012 as freely available software for the detection of such 'colinear blocks' and subsequent synteny and evolutionary analyses based on genome-wide gene location and protein sequence information. Owing to its simplicity and high efficiency for colinear block detection, MCScanX provides a powerful tool for conducting diverse synteny and evolutionary analyses. Moreover, the detection of colinear blocks has been embraced as an integral step for pangenome graph construction. Here, new application trends of MCScanX are explored, striving to better connect this increasingly used tool to other tools and accelerate insight generation from exponentially growing sequence data. We provide a detailed protocol that covers how to install MCScanX on diverse platforms, tune parameters, prepare input files from data from the National Center for Biotechnology Information, run MCScanX and its visualization and evolutionary analysis tools, and connect MCScanX with external tools, including MCScanX-transposed, Circos and SynVisio. This protocol is easily implemented by users with minimal computational background and is adaptable to new data of interest to them. The data and utility programs for this protocol can be obtained from http://bdx-consulting.com/mcscanx-protocol .}, } @article {pmid38488860, year = {2024}, author = {Freddi, S and Rajabal, V and Tetu, SG and Gillings, MR and Penesyan, A}, title = {Microbial biofilms on macroalgae harbour diverse integron gene cassettes.}, journal = {Microbiology (Reading, England)}, volume = {170}, number = {3}, pages = {}, doi = {10.1099/mic.0.001446}, pmid = {38488860}, issn = {1465-2080}, abstract = {Integrons are genetic platforms that capture, rearrange and express mobile modules called gene cassettes. The best characterized gene cassettes encode antibiotic resistance, but the function of most integron gene cassettes remains unknown. Functional predictions suggest that many gene cassettes could encode proteins that facilitate interactions with other cells and with the extracellular environment. Because cell interactions are essential for biofilm stability, we sequenced gene cassettes from biofilms growing on the surface of the marine macroalgae Ulva australis and Sargassum linearifolium. Algal samples were obtained from coastal rock platforms around Sydney, Australia, using seawater as a control. We demonstrated that integrons in microbial biofilms did not sample genes randomly from the surrounding seawater, but harboured specific functions that potentially provided an adaptive advantage to both the bacterial cells in biofilm communities and their macroalgal host. Further, integron gene cassettes had a well-defined spatial distribution, suggesting that each bacterial biofilm acquired these genetic elements via sampling from a large but localized pool of gene cassettes. These findings suggest two forms of filtering: a selective acquisition of different integron-containing bacterial species into the distinct biofilms on Ulva and Sargassum surfaces, and a selective retention of unique populations of gene cassettes at each sampling location.}, } @article {pmid38488392, year = {2024}, author = {Wang, M and Li, X and Liu, X and Hou, X and He, Y and Yu, J-H and Hu, S and Yin, H and Xie, B-B}, title = {Annotation of 2,507 Saccharomyces cerevisiae genomes.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0358223}, doi = {10.1128/spectrum.03582-23}, pmid = {38488392}, issn = {2165-0497}, abstract = {Saccharomyces cerevisiae (baker's yeast, budding yeast) is one of the most important model organisms for biological research and is a crucial microorganism in industry. Currently, a huge number of Saccharomyces cerevisiae genome sequences are available at the public domain. However, these genomes are distributed at different websites and a large number of them are released without annotation information. To provide one complete annotated genome data resource, we collected 2,507 Saccharomyces cerevisiae genome assemblies and re-annotated 2,506 assemblies using a custom annotation pipeline, producing a total of 15,407,164 protein-coding gene models. With a custom pipeline, all these gene sequences were clustered into families. A total of 1,506 single-copy genes were selected as marker genes, which were then used to evaluate the genome completeness and base qualities of all assemblies. Pangenomic analyses were performed based on a selected subset of 847 medium-high-quality genomes. Statistical comparisons revealed a number of gene families showing copy number variations among different organism sources. To the authors' knowledge, this study represents the largest genome annotation project of S. cerevisiae so far, providing rich genomic resources for the future studies of the model organism S. cerevisiae and its relatives.IMPORTANCESaccharomyces cerevisiae (baker's yeast, budding yeast) is one of the most important model organisms for biological research and is a crucial microorganism in industry. Though a huge number of Saccharomyces cerevisiae genome sequences are available at the public domain, these genomes are distributed at different websites and most are released without annotation, hindering the efficient reuse of these genome resources. Here, we collected 2,507 genomes for Saccharomyces cerevisiae, performed genome annotation, and evaluated the genome qualities. All the obtained data have been deposited at public repositories and are freely accessible to the community. This study represents the largest genome annotation project of S. cerevisiae so far, providing one complete annotated genome data set for S. cerevisiae, an important workhorse for fundamental biology, biotechnology, and industry.}, } @article {pmid38488280, year = {2024}, author = {Giacomini, JJ and Torres-Morales, J and Tang, J and Dewhirst, FE and Borisy, GG and Mark Welch, JL}, title = {Spatial ecology of Haemophilus and Aggregatibacter in the human oral cavity.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0401723}, doi = {10.1128/spectrum.04017-23}, pmid = {38488280}, issn = {2165-0497}, abstract = {UNLABELLED: Haemophilus and Aggregatibacter are two of the most common bacterial genera in the human oral cavity, encompassing both commensals and pathogens of substantial ecological and medical significance. In this study, we conducted a metapangenomic analysis of oral Haemophilus and Aggregatibacter species to uncover genomic diversity, phylogenetic relationships, and habitat specialization within the human oral cavity. Using three metrics-pangenomic gene content, phylogenomics, and average nucleotide identity (ANI)-we first identified distinct species and sub-species groups among these genera. Mapping of metagenomic reads then revealed clear patterns of habitat specialization, such as Aggregatibacter species predominantly in dental plaque, a distinctive Haemophilus parainfluenzae sub-species group on the tongue dorsum, and H. sp. HMT-036 predominantly in keratinized gingiva and buccal mucosa. In addition, we found that supragingival plaque samples contained predominantly only one out of the three taxa, H. parainfluenzae, Aggregatibacter aphrophilus, and A. sp. HMT-458, suggesting independent niches or a competitive relationship. Functional analyses revealed the presence of key metabolic genes, such as oxaloacetate decarboxylase, correlated with habitat specialization, suggesting metabolic versatility as a driving force. Additionally, heme synthesis distinguishes H. sp. HMT-036 from closely related Haemophilus haemolyticus, suggesting that the availability of micronutrients, particularly iron, was important in the evolutionary ecology of these species. Overall, our study exemplifies the power of metapangenomics to identify factors that may affect ecological interactions within microbial communities, including genomic diversity, habitat specialization, and metabolic versatility.

IMPORTANCE: Understanding the microbial ecology of the mouth is essential for comprehending human physiology. This study employs metapangenomics to reveal that various Haemophilus and Aggregatibacter species exhibit distinct ecological preferences within the oral cavity of healthy individuals, thereby supporting the site-specialist hypothesis. Additionally, it was observed that the gene pool of different Haemophilus species correlates with their ecological niches. These findings shed light on the significance of key metabolic functions in shaping microbial distribution patterns and interspecies interactions in the oral ecosystem.}, } @article {pmid38487210, year = {2023}, author = {Grizon, A and Theil, S and Callon, C and Gerber, P and Helinck, S and Dugat-Bony, E and Bonnarme, P and Chassard, C}, title = {Genetic and technological diversity of Streptococcus thermophilus isolated from the Saint-Nectaire PDO cheese-producing area.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1245510}, doi = {10.3389/fmicb.2023.1245510}, pmid = {38487210}, issn = {1664-302X}, abstract = {Streptococcus thermophilus is of major importance for cheese manufacturing to ensure rapid acidification; however, studies indicate that intensive use of commercial strains leads to the loss of typical characteristics of the products. To strengthen the link between the product and its geographical area and improve the sensory qualities of cheeses, cheese-producing protected designations of origin (PDO) are increasingly interested in the development of specific autochthonous starter cultures. The present study is therefore investigating the genetic and functional diversity of S. thermophilus strains isolated from a local cheese-producing PDO area. Putative S. thermophilus isolates were isolated and identified from milk collected in the Saint-Nectaire cheese-producing PDO area and from commercial starters. Whole genomes of isolates were sequenced, and a comparative analysis based on their pan-genome was carried out. Important functional properties were studied, including acidifying and proteolytic activities. Twenty-two isolates representative of the diversity of the geographical area and four commercial strains were selected for comparison. The resulting phylogenetic trees do not correspond to the geographical distribution of isolates. The clustering based on the pan-genome analysis indicates that isolates are divided into five distinct groups. A Kyoto Encyclopedia of Genes and Genomes (KEGG) functional annotation of the accessory genes indicates that the accessory gene contents of isolates are involved in different functional categories. High variability in acidifying activities and less diversity in proteolytic activities were also observed. These results indicate that high genetic and functional variabilities of the species S. thermophilus may arise from a small (1,800 km[2]) geographical area and may be exploited to meet demand for use as autochthonous starters.}, } @article {pmid38486452, year = {2024}, author = {Shi, T and Zhang, X and Hou, Y and Jia, C and Dan, X and Zhang, Y and Jiang, Y and Lai, Q and Feng, J and Feng, J and Ma, T and Wu, J and Liu, S and Zhang, L and Long, Z and Chen, L and Street, NR and Ingvarsson, PK and Liu, J and Yin, T and Wang, J}, title = {The super-pangenome of Populus unveil genomic facets for its adaptation and diversification in widespread forest trees.}, journal = {Molecular plant}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.molp.2024.03.009}, pmid = {38486452}, issn = {1752-9867}, abstract = {Understanding the underlying mechanisms and links between genome evolution and adaptive innovations stands as a key goal in evolutionary studies. Poplars, among the world's most widely distributed and cultivated trees, exhibit extensive phenotypic diversity and environmental adaptability. In this study, we present a genus-level super-pangenome comprising 19 Populus genomes, revealing the likely pivotal role of private genes in facilitating local environmental and climate adaptation. Through the integration of pan-genomes with transcriptomes, methylomes and chromatin accessibility mapping, we unveil that the evolutionary trajectories of pan-genes and duplicated genes are closely linked to local genomic landscapes of regulatory and epigenetic architectures, notably CG methylation in gene-body regions. Further comparative genomic analyses have enabled the identification of 142,202 structural variants (SVs) across species, which intersect with a significant number of genes and contribute substantially to both phenotypic and adaptive divergence. We have experimentally validated a ∼180 bp presence/absence variant impacting the expression of the CUC2 gene, crucial for leaf serration formation. Finally, we developed a user-friendly web-based tool encompassing the multi-omics resources associated with the Populus super-pangenome (http://www.populus-superpangenome.com/). Together, the present pioneering super-pangenome resource in forest trees not only aid in the advancement of breeding efforts of this globally important tree genus but also offer valuable insights into potential avenues for comprehending tree biology.}, } @article {pmid38478130, year = {2024}, author = {Wisal, A and Saeed, N and Aurongzeb, M and Shafique, M and Sohail, S and Anwar, W and Basharat, Z and Irfan, M and Ullah, A and Hassan, SS}, title = {Bridging drug discovery through hierarchical subtractive genomics against asd, trpG, and secY of pneumonia causing MDR Staphylococcus aureus.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {34}, pmid = {38478130}, issn = {1617-4623}, abstract = {Staphylococcus aureus (S. aureus) is an opportunistic gram-positive, non-motile, and non-sporulating bacteria that induces pneumonia, a provocative lung infection affecting mainly the terminal bronchioles and the small air sacs known as alveoli. Recently, it has developed antibiotic resistance to the available consortium as per the WHO reports; thereby, novel remedial targets and resilient medications to forestall and cure this illness are desperately needed. Here, using pan-genomics, a total of 1,387 core proteins were identified. Subtractive proteome analyses further identified 12 proteins that are vital for bacteria. One membrane protein (secY) and two cytoplasmic proteins (asd and trpG) were chosen as possible therapeutic targets concerning minimum % host identity, essentiality, and other cutoff values, such as high resistance in the MDR S. aureus. The UniProt AA sequences of the selected targets were modelled and docked against 3 drug-like chemical libraries. The top-ranked compounds i.e., ZINC82049692, ZINC85492658 and 3a of Isosteviol derivative for Aspartate-semialdehyde dehydrogenase (asd); ZINC38222743, ZINC70455378, and 5 m Isosteviol derivative for Anthranilate synthase component II (trpG); and finally, ZINC72292296, ZINC85632684, and 7 m Isosteviol derivative for Protein translocase subunit secY (secY), were further subjected to molecular dynamics studies for thermodynamic stability and energy calculation. Our study proposes new therapeutic targets in S. aureus, some of which have previously been reported in other pathogenic microorganisms. Owing to further experimental validation, we anticipate that the adapted methodology and the predicted results in this work could make major contributions towards novel drug discovery and their targets in S. aureus caused pneumonia.}, } @article {pmid38472486, year = {2024}, author = {Martínez-Gallardo, MJ and Villicaña, C and Yocupicio-Monroy, M and Alcaraz-Estrada, SL and Salazar-Salinas, J and Mendoza-Vázquez, OF and Damazo-Hernández, G and León-Félix, J}, title = {Comparative genomic analysis of Pseudomonas aeruginosa strains susceptible and resistant to carbapenems and aztreonam isolated from patients with healthcare-associated infections in a Mexican hospital.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {29}, pmid = {38472486}, issn = {1617-4623}, support = {E05//Ciencia y Tecnología ISSSTE/ ; }, abstract = {Pseudomonas aeruginosa (PA) is an important opportunistic pathogen that causes different infections on immunocompromised patients. Within PA accessory genome, differences in virulence, antibiotic resistance and biofilm formation have been described between strains, leading to the emergence of multidrug-resistant strains. The genome sequences of 17 strains isolated from patients with healthcare-associated infections in a Mexican hospital were genomically and phylogenetically analyzed and antibiotic resistance genes, virulence genes, and biofilm formation genes were detected. Fifteen of the 17 strains were resistant to at least two of the carbapenems meropenem, imipenem, and the monobactam aztreonam. The antibiotic resistance (mexA, mexB, and oprM) and the biofilm formation (pslA and pslD) genes were detected in all strains. Differences were found between strains in accessory genome size. The strains had different sequence types, and seven strains had sequence types associated with global high risk epidemic PA clones. All strains were represented in two groups among PA global strains. In the 17 strains, horizontally acquired resistance genes to aminoglycosides and beta-lactams were found, mainly, and between 230 and 240 genes that encode virulence factors. The strains under study were variable in terms of their accessory genome, antibiotic resistance, and virulence genes. With these characteristics, we provide information about the genomic diversity of clinically relevant PA strains.}, } @article {pmid38470044, year = {2024}, author = {Liu, D and Xie, L-S and Lian, S and Li, K and Yang, Y and Wang, W-Z and Hu, S and Liu, S-J and Liu, C and He, Z}, title = {Anaerostipes hadrus, a butyrate-producing bacterium capable of metabolizing 5-fluorouracil.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0081623}, doi = {10.1128/msphere.00816-23}, pmid = {38470044}, issn = {2379-5042}, abstract = {UNLABELLED: Anaerostipes hadrus (A. hadrus) is a dominant species in the human gut microbiota and considered a beneficial bacterium for producing probiotic butyrate. However, recent studies have suggested that A. hadrus may negatively affect the host through synthesizing fatty acid and metabolizing the anticancer drug 5-fluorouracil, indicating that the impact of A. hadrus is complex and unclear. Therefore, comprehensive genomic studies on A. hadrus need to be performed. We integrated 527 high-quality public A. hadrus genomes and five distinct metagenomic cohorts. We analyzed these data using the approaches of comparative genomics, metagenomics, and protein structure prediction. We also performed validations with culture-based in vitro assays. We constructed the first large-scale pan-genome of A. hadrus (n = 527) and identified 5-fluorouracil metabolism genes as ubiquitous in A. hadrus genomes as butyrate-producing genes. Metagenomic analysis revealed the wide and stable distribution of A. hadrus in healthy individuals, patients with inflammatory bowel disease, and patients with colorectal cancer, with healthy individuals carrying more A. hadrus. The predicted high-quality protein structure indicated that A. hadrus might metabolize 5-fluorouracil by producing bacterial dihydropyrimidine dehydrogenase (encoded by the preTA operon). Through in vitro assays, we validated the short-chain fatty acid production and 5-fluorouracil metabolism abilities of A. hadrus. We observed for the first time that A. hadrus can convert 5-fluorouracil to α-fluoro-β-ureidopropionic acid, which may result from the combined action of the preTA operon and adjacent hydA (encoding bacterial dihydropyrimidinase). Our results offer novel understandings of A. hadrus, exceptionally functional features, and potential applications.

IMPORTANCE: This work provides new insights into the evolutionary relationships, functional characteristics, prevalence, and potential applications of Anaerostipes hadrus.}, } @article {pmid38469580, year = {2024}, author = {Yakubu, B and Appiah, EM and Adu, AF}, title = {Pangenome Analysis of Helicobacter pylori Isolates from Selected Areas of Africa Indicated Diverse Antibiotic Resistance and Virulence Genes.}, journal = {International journal of genomics}, volume = {2024}, number = {}, pages = {5536117}, pmid = {38469580}, issn = {2314-4378}, abstract = {The challenge facing Helicobacter pylori (H. pylori) infection management in some parts of Africa is the evolution of drug-resistant species, the lack of gold standard in diagnostic methods, and the ineffectiveness of current vaccines against the bacteria. It is being established that even though clinical consequences linked to the bacteria vary geographically, there is rather a generic approach to treatment. This situation has remained problematic in the successful fight against the bacteria in parts of Africa. As a result, this study compared the genomes of selected H. pylori isolates from selected areas of Africa and evaluated their virulence and antibiotic drug resistance, those that are highly pathogenic and are associated with specific clinical outcomes and those that are less virulent and rarely associated with clinical outcomes. 146 genomes of H. pylori isolated from selected locations of Africa were sampled, and bioinformatic tools such as Abricate, CARD RGI, MLST, Prokka, Roary, Phandango, Google Sheets, and iTOLS were used to compare the isolates and their antibiotic resistance or susceptibility. Over 20 k virulence and AMR genes were observed. About 95% of the isolates were genetically diverse, 90% of the isolates harbored shell genes, and 50% harbored cloud and core genes. Some isolates did not retain the cagA and vacA genes. Clarithromycin, metronidazole, amoxicillin, and tinidazole were resistant to most AMR genes (vacA, cagA, oip, and bab). Conclusion. This study found both virulence and AMR genes in all H. pylori strains in all the selected geographies around Africa with differing quantities. MLST, Pangenome, and ORF analyses showed disparities among the isolates. This in general could imply diversities in terms of genetics, evolution, and protein production. Therefore, generic administration of antibiotics such as clarithromycin, amoxicillin, and erythromycin as treatment methods in the African subregion could be contributing to the spread of the bacterium's antibiotic resistance.}, } @article {pmid38463963, year = {2024}, author = {Young, MG and Straub, TJ and Worby, CJ and Metsky, HC and Gnirke, A and Bronson, RA and van Dijk, LR and Desjardins, CA and Matranga, C and Qu, J and Dodson, K and Schreiber, HL and Manson, AL and Hultgren, SJ and Earl, AM}, title = {Distinct Escherichia coli transcriptional profiles in the guts of recurrent UTI sufferers revealed by pan-genome hybrid selection.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.29.582780}, pmid = {38463963}, abstract = {Low-abundance members of microbial communities are difficult to study in their native habitat. This includes Escherichia coli , a minor, but common inhabitant of the gastrointestinal tract and opportunistic pathogen, including of the urinary tract, where it causes most infections. While our understanding of the interactions between uropathogenic Escherichia coli (UPEC) and the bladder is increasing, comparatively little is known about UPEC in its pre-infection reservoir, partly due to its low abundance there (<1% relative abundance). In order to specifically and sensitively explore the genomes and transcriptomes of diverse E. coli from gastrointestinal communities, we developed E. coli PanSelect, a set of probes designed to enrich E. coli 's broad pangenome. First we demonstrated the ability of PanSelect to enrich diverse strains in an unbiased way using a mock community of known composition. Then we enriched E. coli DNA and RNA from human stool microbiomes by 158 and 30-fold, respectively. We also used E. coli PanSelect to explore the gene content and transcriptome of E. coli within the gut microbiomes of women with history of recurrent urinary tract infection (rUTI), finding differential regulation of pathways that suggests that the rUTI gut environment promotes respiratory vs fermentative metabolism. E. coli PanSelect technology holds promise for investigations of native in vivo biology of diverse E. coli in the gut and other environments, where it is a minor component of the microbial community, using unbiased, culture-free shotgun sequencing. This method could also be generally applied to other highly diverse, low abundance bacteria.}, } @article {pmid38463499, year = {2024}, author = {Li, H and Marin, M and Farhat, MR}, title = {Exploring gene content with pangenome gene graphs.}, journal = {ArXiv}, volume = {}, number = {}, pages = {}, pmid = {38463499}, issn = {2331-8422}, abstract = {MOTIVATION: The gene content regulates the biology of an organism. It varies between species and between individuals of the same species. Although tools have been developed to identify gene content changes in bacterial genomes, none is applicable to collections of large eukaryotic genomes such as the human pangenome.

RESULTS: We developed pangene, a computational tool to identify gene orientation, gene order and gene copy-number changes in a collection of genomes. Pangene aligns a set of input protein sequences to the genomes, resolves redundancies between protein sequences and constructs a gene graph with each genome represented as a walk in the graph. It additionally finds subgraphs that encodes gene content changes. Applied to the human pangenome, pangene identifies known gene-level variations and reveals complex haplotypes that are not well studied before. Pangene also works with high-quality bacterial pangenome and reports similar numbers of core and accessory genes in comparison to existing tools.

Source code at https://github.com/lh3/pangene; pre-built pangene graphs can be downloaded from https://zenodo.org/records/8118576 and visualized at https://pangene.bioinweb.org.}, } @article {pmid38461665, year = {2024}, author = {Feng, NX and Li, DW and Zhang, F and Bin, H and Huang, YT and Xiang, L and Liu, BL and Cai, QY and Li, YW and Xu, DL and Xie, Y and Mo, CH}, title = {Biodegradation of phthalate acid esters and whole-genome analysis of a novel Streptomyces sp. FZ201 isolated from natural habitats.}, journal = {Journal of hazardous materials}, volume = {469}, number = {}, pages = {133972}, doi = {10.1016/j.jhazmat.2024.133972}, pmid = {38461665}, issn = {1873-3336}, abstract = {Di-n-butyl phthalate (DBP) is one of the most extensively used phthalic acid esters (PAEs) and is considered to be an emerging, globally concerning pollutant. The genus Streptomyces holds promise as a degrader of various organic pollutants, but PAE biodegradation mechanisms by Streptomyces species remain unsolved. In this study, a novel PAE-degrading Streptomyces sp. FZ201 isolated from natural habitats efficiently degraded various PAEs. FZ201 had strong resilience against DBP and exhibited immediate degradation, with kinetics adhering to a first-order model. The comprehensive biodegradation of DBP involves de-esterification, β-oxidation, trans-esterification, and aromatic ring cleavage. FZ201 contains numerous catabolic genes that potentially facilitate PAE biodegradation. The DBP metabolic pathway was reconstructed by genome annotation and intermediate identification. Streptomyces species have an open pangenome with substantial genome expansion events during the evolutionary process, enabling extensive genetic diversity and highly plastic genomes within the Streptomyces genus. FZ201 had a diverse array of highly expressed genes associated with the degradation of PAEs, potentially contributing significantly to its adaptive advantage and efficiency of PAE degradation. Thus, FZ201 is a promising candidate for remediating highly PAE-contaminated environments. These findings enhance our preliminary understanding of the molecular mechanisms employed by Streptomyces for the removal of PAEs.}, } @article {pmid38459435, year = {2024}, author = {Zhu, L and Liu, H and Li, X and Shi, Y and Yin, X and Pi, X}, title = {Whole-genome sequencing and analysis of Chryseobacterium arthrosphaerae from Rana nigromaculata.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {80}, pmid = {38459435}, issn = {1471-2180}, support = {2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; 2022SNJF072//Zhejiang Provincial Science and Technology Cooperation Plan of "Three Rural Areas and Nine Rural Areas"/ ; }, abstract = {Chryseobacterium arthrosphaerae strain FS91703 was isolated from Rana nigromaculata in our previous study. To investigate the genomic characteristics, pathogenicity-related genes, antimicrobial resistance, and phylogenetic relationship of this strain, PacBio RS II and Illumina HiSeq 2000 platforms were used for the whole genome sequencing. The genome size of strain FS91703 was 5,435,691 bp and GC content was 37.78%. A total of 4,951 coding genes were predicted; 99 potential virulence factors homologs were identified. Analysis of antibiotic resistance genes revealed that strain FS91703 harbored 10 antibiotic resistance genes in 6 categories and 2 multidrug-resistant efflux pump genes, including adeG and farA. Strain FS91703 was sensitive to β-lactam combination drugs, cephem, monobactam and carbapenems, intermediately resistant to phenicol, and resistant to penicillin, aminoglycosides, tetracycline, fluoroquinolones, and folate pathway inhibitors. Phylogenetic analysis revealed that strain FS91703 and C. arthrosphaerae CC-VM-7[T] were on the same branch of the phylogenetic tree based on 16 S rRNA; the ANI value between them was 96.99%; and the DDH values were 80.2, 72.2 and 81.6% by three default calculation formulae. These results suggested that strain FS91703 was a species of C. arthrosphaerae. Pan-genome analysis showed FS91703 had 566 unique genes compared with 13 other C. arthrosphaerae strains, and had a distant phylogenetic relationship with the other C. arthrosphaerae strains of the same branch in phylogenetic tree based on orthologous genes. The results of this study suggest that strain FS91703 is a multidrug-resistant and highly virulent bacterium, that differs from other C. arthrosphaerae strains at the genomic level. The knowledge about the genomic characteristics and antimicrobial resistance of strain FS91703 provides valuable insights into this rare species, as well as guidance for the treatment of the disease caused by FS91703 in Rana nigromaculata.}, } @article {pmid38450165, year = {2024}, author = {Zhou, Y and Tu, T and Yao, X and Luo, Y and Yang, Z and Ren, M and Zhang, G and Yu, Y and Lu, A and Wang, Y}, title = {Pan-genome analysis of Streptococcus suis serotype 2 highlights genes associated with virulence and antibiotic resistance.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1362316}, pmid = {38450165}, issn = {1664-302X}, abstract = {Streptococcus suis serotype 2 (SS2) is a Gram-positive bacterium. It is a common and significant pathogen in pigs and a common cause of zoonotic meningitis in humans. It can lead to sepsis, endocarditis, arthritis, and pneumonia. If not diagnosed and treated promptly, it has a high mortality rate. The pan-genome of SS2 is open, and with an increasing number of genes, the core genome and accessory genome may exhibit more pronounced differences. Due to the diversity of SS2, the genes related to its virulence and resistance are still unclear. In this study, a strain of SS2 was isolated from a pig farm in Sichuan Province, China, and subjected to whole-genome sequencing and characterization. Subsequently, we conducted a Pan-Genome-Wide Association Study (Pan-GWAS) on 230 strains of SS2. Our analysis indicates that the core genome is composed of 1,458 genes related to the basic life processes of the bacterium. The accessory genome, consisting of 4,337 genes, is highly variable and a major contributor to the genetic diversity of SS2. Furthermore, we identified important virulence and resistance genes in SS2 through pan-GWAS. The virulence genes of SS2 are mainly associated with bacterial adhesion. In addition, resistance genes in the core genome may confer natural resistance of SS2 to fluoroquinolone and glycopeptide antibiotics. This study lays the foundation for further research on the virulence and resistance of SS2, providing potential new drug and vaccine targets against SS2.}, } @article {pmid38448140, year = {2024}, author = {Mathur, S and Singh, D and Ranjan, R}, title = {Recent advances in plant translational genomics for crop improvement.}, journal = {Advances in protein chemistry and structural biology}, volume = {139}, number = {}, pages = {335-382}, doi = {10.1016/bs.apcsb.2023.11.009}, pmid = {38448140}, issn = {1876-1631}, abstract = {The growing population, climate change, and limited agricultural resources put enormous pressure on agricultural systems. A plateau in crop yields is occurring and extreme weather events and urbanization threaten the livelihood of farmers. It is imperative that immediate attention is paid to addressing the increasing food demand, ensuring resilience against emerging threats, and meeting the demand for more nutritious, safer food. Under uncertain conditions, it is essential to expand genetic diversity and discover novel crop varieties or variations to develop higher and more stable yields. Genomics plays a significant role in developing abundant and nutrient-dense food crops. An alternative to traditional breeding approach, translational genomics is able to improve breeding programs in a more efficient and precise manner by translating genomic concepts into practical tools. Crop breeding based on genomics offers potential solutions to overcome the limitations of conventional breeding methods, including improved crop varieties that provide more nutritional value and are protected from biotic and abiotic stresses. Genetic markers, such as SNPs and ESTs, contribute to the discovery of QTLs controlling agronomic traits and stress tolerance. In order to meet the growing demand for food, there is a need to incorporate QTLs into breeding programs using marker-assisted selection/breeding and transgenic technologies. This chapter primarily focuses on the recent advances that are made in translational genomics for crop improvement and various omics techniques including transcriptomics, metagenomics, pangenomics, single cell omics etc. Numerous genome editing techniques including CRISPR Cas technology and their applications in crop improvement had been discussed.}, } @article {pmid38439049, year = {2024}, author = {Chen, C and Wu, S and Sun, Y and Zhou, J and Chen, Y and Zhang, J and Birchler, JA and Han, F and Yang, N and Su, H}, title = {Three near-complete genome assemblies reveal substantial centromere dynamics from diploid to tetraploid in Brachypodium genus.}, journal = {Genome biology}, volume = {25}, number = {1}, pages = {63}, pmid = {38439049}, issn = {1474-760X}, support = {2021YFF1000800//National Key Research and Development Program of China/ ; 32170571//National Natural Science Foundation of China/ ; 2021ZKPY008//Fundamental Research Funds for the Central Universities/ ; No. B21HJ0504//Hainan Yazhou Bay Seed Laboratory/ ; }, abstract = {BACKGROUND: Centromeres are critical for maintaining genomic stability in eukaryotes, and their turnover shapes genome architectures and drives karyotype evolution. However, the co-evolution of centromeres from different species in allopolyploids over millions of years remains largely unknown.

RESULTS: Here, we generate three near-complete genome assemblies, a tetraploid Brachypodium hybridum and its two diploid ancestors, Brachypodium distachyon and Brachypodium stacei. We detect high degrees of sequence, structural, and epigenetic variations of centromeres at base-pair resolution between closely related Brachypodium genomes, indicating the appearance and accumulation of species-specific centromere repeats from a common origin during evolution. We also find that centromere homogenization is accompanied by local satellite repeats bursting and retrotransposon purging, and the frequency of retrotransposon invasions drives the degree of interspecies centromere diversification. We further investigate the dynamics of centromeres during alloploidization process, and find that dramatic genetics and epigenetics architecture variations are associated with the turnover of centromeres between homologous chromosomal pairs from diploid to tetraploid. Additionally, our pangenomes analysis reveals the ongoing variations of satellite repeats and stable evolutionary homeostasis within centromeres among individuals of each Brachypodium genome with different polyploidy levels.

CONCLUSIONS: Our results provide unprecedented information on the genomic, epigenomic, and functional diversity of highly repetitive DNA between closely related species and their allopolyploid genomes at both coarse and fine scale.}, } @article {pmid38438804, year = {2024}, author = {Niu, D and Feng, N and Xi, S and Xu, J and Su, Y}, title = {Genomics-based analysis of four porcine-derived lactic acid bacteria strains and their evaluation as potential probiotics.}, journal = {Molecular genetics and genomics : MGG}, volume = {299}, number = {1}, pages = {24}, pmid = {38438804}, issn = {1617-4623}, support = {2022YFD1300402//Key Technologies Research and Development Program/ ; 31872362//National Natural Science Foundation of China/ ; 32072688//National Natural Science Foundation of China/ ; }, abstract = {The search for probiotics and exploration of their functions are crucial for livestock farming. Recently, porcine-derived lactic acid bacteria (LAB) have shown great potential as probiotics. However, research on the evaluation of porcine-derived LAB as potential probiotics through genomics-based analysis is relatively limited. The present study analyzed four porcine-derived LAB strains (Lactobacillus johnsonii L16, Latilactobacillus curvatus ZHA1, Ligilactobacillus salivarius ZSA5 and Ligilactobacillus animalis ZSB1) using genomic techniques and combined with in vitro tests to evaluate their potential as probiotics. The genome sizes of the four strains ranged from 1,897,301 bp to 2,318,470 bp with the GC contents from 33.03 to 41.97%. Pan-genomic analysis and collinearity analysis indicated differences among the genomes of four strains. Carbohydrate active enzymes analysis revealed that L. johnsonii L16 encoded more carbohydrate active enzymes than other strains. KEGG pathway analysis and in vitro tests confirmed that L. johnsonii L16 could utilize a wide range of carbohydrates and had good utilization capacity for each carbohydrate. The four strains had genes related to acid tolerance and were tolerant to low pH, with L. johnsonii L16 showing the greatest tolerance. The four strains contained genes related to bile salt tolerance and were able to tolerate 0.1% bile salt. Four strains had antioxidant related genes and exhibited antioxidant activity in in vitro tests. They contained the genes linked with organic acid biosynthesis and exhibited antibacterial activity against enterotoxigenic Escherichia coli K88 (ETEC K88) and Salmonella 6,7:c:1,5, wherein, L. johnsonii L16 and L. salivarius ZSA5 had gene clusters encoding bacteriocin. Results suggest that genome analysis combined with in vitro tests is an effective approach for evaluating different strains as probiotics. The findings of this study indicate that L. johnsonii L16 has the potential as a probiotic strain among the four strains and provide theoretical basis for the development of probiotics in swine production.}, } @article {pmid38421269, year = {2024}, author = {Deery, J and Carmody, M and Flavin, R and Tomanek, M and O'Keeffe, M and McGlacken, GP and Reen, FJ}, title = {Comparative genomics reveals distinct diversification patterns among LysR-type transcriptional regulators in the ESKAPE pathogen Pseudomonas aeruginosa.}, journal = {Microbial genomics}, volume = {10}, number = {2}, pages = {}, doi = {10.1099/mgen.0.001205}, pmid = {38421269}, issn = {2057-5858}, mesh = {Humans ; Pseudomonas aeruginosa/genetics ; Genomics ; *Pseudomonas Infections ; Pseudomonas ; *Cystic Fibrosis/genetics ; }, abstract = {Pseudomonas aeruginosa, a harmful nosocomial pathogen associated with cystic fibrosis and burn wounds, encodes for a large number of LysR-type transcriptional regulator proteins. To understand how and why LTTR proteins evolved with such frequency and to establish whether any relationships exist within the distribution we set out to identify the patterns underpinning LTTR distribution in P. aeruginosa and to uncover cluster-based relationships within the pangenome. Comparative genomic studies revealed that in the JGI IMG database alone ~86 000 LTTRs are present across the sequenced genomes (n=699). They are widely distributed across the species, with core LTTRs present in >93 % of the genomes and accessory LTTRs present in <7 %. Analysis showed that subsets of core LTTRs can be classified as either variable (typically specific to P. aeruginosa) or conserved (and found to be distributed in other Pseudomonas species). Extending the analysis to the more extensive Pseudomonas database, PA14 rooted analysis confirmed the diversification patterns and revealed PqsR, the receptor for the Pseudomonas quinolone signal (PQS) and 2-heptyl-4-quinolone (HHQ) quorum-sensing signals, to be amongst the most variable in the dataset. Successful complementation of the PAO1 pqsR [-] mutant using representative variant pqsR sequences suggests a degree of structural promiscuity within the most variable of LTTRs, several of which play a prominent role in signalling and communication. These findings provide a new insight into the diversification of LTTR proteins within the P. aeruginosa species and suggests a functional significance to the cluster, conservation and distribution patterns identified.}, } @article {pmid38421062, year = {2024}, author = {Ji, G and Long, Y and Cai, G and Wang, A and Yan, G and Li, H and Gao, G and Xu, K and Huang, Q and Chen, B and Li, L and Li, F and Nishio, T and Shen, J and Wu, X}, title = {The chromosome-scale genome of wild Brassica oleracea provides insights into the domestication of Brassica plants.}, journal = {Journal of experimental botany}, volume = {}, number = {}, pages = {}, doi = {10.1093/jxb/erae079}, pmid = {38421062}, issn = {1460-2431}, abstract = {The cultivated diploid Brassica oleracea is an important vegetable crop, but the genetic basis of domestication remains largely unclear without high-quality reference genomes of wild B. oleracea. Here, we report the first chromosome-level assembly of the wild Brassica oleracea L. W03 genome, (total genome size, 630.7 Mb; scaffold N50, 64.6 Mb). Using newly assembled W03 genome, we constructed a gene-based B. oleracea pangenome and identified 29,744 core genes, 23,306 dispensable genes, and 1,896 private genes. We resequenced 53 accessions, which represent six potential wild B. oleracea progenitor species. The results of the population genomic analysis showed that wild B. oleracea population had the highest level of diversity and represented the more closely related population of horticultural B. oleracea. Additionally, the WUSCHEL gene was found to play a decisive role in domestication and to be involved in cauliflower and broccoli curd formation. We also illustrate the loss of disease resistance genes during domestication selection. Our results provide deep insights into B. oleracea domestication and will facilitate Brassica crop genetic improvement.}, } @article {pmid38418560, year = {2024}, author = {Chao, P and Zhang, X and Zhang, L and Yang, A and Wang, Y and Chen, X}, title = {Proteomics-based vaccine targets annotation and design of multi-epitope vaccine against antibiotic-resistant Streptococcus gallolyticus.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {4836}, pmid = {38418560}, issn = {2045-2322}, abstract = {Streptococcus gallolyticus is a non-motile, gram-positive bacterium that causes infective endocarditis. S. gallolyticus has developed resistance to existing antibiotics, and no vaccine is currently available. Therefore, it is essential to develop an effective S. gallolyticus vaccine. Core proteomics was used in this study together with subtractive proteomics and reverse vaccinology approach to find antigenic proteins that could be utilized for the design of the S. gallolyticus multi-epitope vaccine. The pipeline identified two antigenic proteins as potential vaccine targets: penicillin-binding protein and the ATP synthase subunit. T and B cell epitopes from the specific proteins were forecasted employing several immunoinformatics and bioinformatics resources. A vaccine (360 amino acids) was created using a combination of seven cytotoxic T cell lymphocyte (CTL), three helper T cell lymphocyte (HTL), and five linear B cell lymphocyte (LBL) epitopes. To increase immune responses, the vaccine was paired with a cholera enterotoxin subunit B (CTB) adjuvant. The developed vaccine was highly antigenic, non-allergenic, and stable for human use. The vaccine's binding affinity and molecular interactions with the human immunological receptor TLR4 were studied using molecular mechanics/generalized Born surface area (MMGBSA), molecular docking, and molecular dynamic (MD) simulation analyses. Escherichia coli (strain K12) plasmid vector pET-28a (+) was used to examine the ability of the vaccine to be expressed. According to the outcomes of these computer experiments, the vaccine is quite promising in terms of developing a protective immunity against diseases. However, in vitro and animal research are required to validate our findings.}, } @article {pmid38417638, year = {2024}, author = {Banerjee, R and Robinson, SM and Lahiri, A and Verma, P and Banerjee, AK and Basak, S and Basak, K and Paul, S}, title = {Exploring the resistome and virulome in major sequence types of Acinetobacter baumannii genomes: Correlations with genome divergence and sequence types.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105579}, doi = {10.1016/j.meegid.2024.105579}, pmid = {38417638}, issn = {1567-7257}, abstract = {The increasing global prevalence of antimicrobial resistance in Acinetobacter baumannii has led to concerns regarding the effectiveness of infection treatment. Moreover, the critical role of virulence factor genes in A. baumannii's pathogenesis and its propensity to cause severe disease is of particular importance. Comparative genomics, including multi-locus sequence typing (MLST), enhances our understanding of A. baumannii epidemiology. While there is substantial documentation on A. baumannii, a comprehensive study of the antibiotic-resistant mechanisms and the virulence factors contributing to pathogenesis, and their correlation with Sequence Types (STs) remains incompletely elucidated. In this study, we aim to explore the relationship between antimicrobial resistance genes, virulence factor genes, and STs using genomic data from 223 publicly available A. baumannii strains. The core phylogeny analysis revealed five predominant STs in A. baumannii genomes, linked to their geographical sources of isolation. Furthermore, the resistome and virulome of A. baumannii followed an evolutionary pattern consistent with their pan-genome evolution. Among the major STs, we observed significant variations in resistant genes against "aminoglycoside" and "sulphonamide" antibiotics, highlighting the role of genotypic variations in determining resistance profiles. Furthermore, the presence of virulence factor genes, particularly exotoxin and nutritional / metabolic factor genes, played a crucial role in distinguishing the major STs, suggesting a potential link between genetic makeup and pathogenicity. Understanding these associations can provide valuable insights into A. baumannii's virulence potential and clinical outcomes, enabling the development of effective strategies to combat infections caused by this opportunistic pathogen.}, } @article {pmid38415665, year = {2024}, author = {Guillén, R and Salinas, C and Mendoza-Álvarez, A and Rubio Rodríguez, LA and Díaz-de Usera, A and Lorenzo-Salazar, JM and González-Montelongo, R and Flores, C and Rodríguez, F}, title = {Genomic epidemiology of the primary methicillin-resistant Staphylococcus aureus clones causing invasive infections in Paraguayan children.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0301223}, doi = {10.1128/spectrum.03012-23}, pmid = {38415665}, issn = {2165-0497}, abstract = {UNLABELLED: Methicillin-resistant Staphylococcus aureus (MRSA) is one of the major human pathogens. It could carry numerous resistance genes and virulence factors in its genome, some of which are related to the severity of the infection. An observational, descriptive, cross-sectional study was designed to molecularly analyze MRSA isolates that cause invasive infections in Paraguayan children from 2009 to 2013. Ten representative MRSA isolates of the main clonal complex identified were analyzed with short-read paired-end sequencing and assessed for the virulome, resistome, and phylogenetic relationships. All the genetically linked MRSA isolates were recovered from diverse clinical sources, patients, and hospitals at broad gap periods. The pan-genomic analysis of these clones revealed three major and different clonal complexes (CC30, CC5, and CC8), each composed of clones closely related to each other. The CC30 genomes prove to be a successful clone, strongly installed and disseminated throughout our country, and closely related to other CC30 public genomes from the region and the world. The CC5 shows the highest genetic variability, and the CC8 carried the complete arginine catabolic mobile element (ACME), closely related to the USA300-NAE-ACME+, identified as the major cause of CA-MRSA infections in North America. Multiple virulence and resistance genes were identified for the first time in this study, highlighting the complex virulence profiles of MRSA circulating in the country. This study opens a wide range of new possibilities for future projects and trials to improve the existing knowledge on the epidemiology of MRSA circulating in Paraguay.

IMPORTANCE: The increasing prevalence of methicillin-resistant Staphylococcus aureus (MRSA) is a public health problem worldwide. The most frequent MRSA clones identified in Paraguay in previous studies (including community and hospital acquired) were the Pediatric (CC5-ST5-IV), the Cordobes-Chilean (CC5-ST5-I), the SouthWest Pacific (CC30-ST30-IV), and the Brazilian (CC8-ST239-III) clones. In this study, the pan-genomic analysis of the most representative MRSA clones circulating in invasive infection in Paraguayan children over the years 2009-2013, such as the CC30-ST30-IV, CC5-ST5-IV, and CC8-ST8-IV, was carried out to evaluate their genetic diversity, their repertoire of virulence factors, and antimicrobial resistance determinants. This revealed multiple virulence and resistance genes, highlighting the complex virulence profiles of MRSA circulating in Paraguay. Our work is the first genomic study of MRSA in Paraguay and will contribute to the development of genomic surveillance in the region and our understanding of the global epidemiology of this pathogen.}, } @article {pmid38413855, year = {2024}, author = {Wang, H and Xia, F and Xia, Y and Li, J and Hu, Y and Deng, Y and Zou, M}, title = {Pangenome analysis of Shewanella xiamenensis revealed important genetic traits concerning genetic diversity, pathogenicity and antibiotic resistance.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {216}, pmid = {38413855}, issn = {1471-2164}, support = {No. 2023JJ30942//Natural Science Foundation of Hunan Province/ ; }, abstract = {BACKGROUND: Shewanella xiamenensis, widely distributed in natural environments, has long been considered as opportunistic pathogen. Recently, significant changes in the resistance spectrum have been observed in S. xiamenensis, due to acquired antibiotic resistance genes. Therefore, a pan-genome analysis was conducted to illuminate the genomic changes in S. xiamenensis.

RESULTS: Phylogenetic analysis revealed three major clusters and three singletons, among which close relationship between several strains was discovered, regardless of their host and niches. The "open" genomes with diversity of accessory and strain-specific genomes took advantage towards diversity environments. The purifying selection pressure was the main force on genome evolution, especially in conservative genes. Only 53 gene families were under positive selection pressure. Phenotypic resistance analysis revealed 21 strains were classified as multi-drug resistance (MDR). Ten types of antibiotic resistance genes and two heavy metal resistance operons were discovered in S. xiamenensis. Mobile genetic elements and horizontal gene transfer increased genome diversity and were closely related to MDR strains. S. xiamenensis carried a variety of virulence genes and macromolecular secretion systems, indicating their important roles in pathogenicity and adaptability. Type IV secretion system was discovered in 15 genomes with various sequence structures, indicating it was originated from different donors through horizontal gene transfer.

CONCLUSIONS: This study provided with a detailed insight into the changes in the pan-genome of S. xiamenensis, highlighting its capability to acquire new mobile genetic elements and resistance genes for its adaptation to environment and pathogenicity to human and animals.}, } @article {pmid38413611, year = {2024}, author = {Go, S and Koo, H and Jung, M and Hong, S and Yi, G and Kim, YM}, title = {Pan-chloroplast genomes for accession-specific marker development in Hibiscus syriacus.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {246}, pmid = {38413611}, issn = {2052-4463}, abstract = {Hibiscus syriacus L. is a renowned ornamental plant. We constructed 95 chloroplast genomes of H. syriacus L. cultivars using a short-read sequencing platform (Illumina) and a long-read sequencing platform (Oxford Nanopore Technology). The following genome assembly, we delineate quadripartite structures encompassing large single-copy, small single-copy, and inverted repeat (IRa and IRb) regions, from 160,231 bp to 161,041 bp. Our comprehensive analyses confirmed the presence of 79 protein-coding genes, 30 tRNA genes, and 4 rRNA genes in the pan-chloroplast genome, consistent with prior research on the H. syriacus chloroplast genome. Subsequent pangenome analysis unveiled widespread genome sequence conservation alongside unique cultivar-specific variant patterns consisting of 193 single-nucleotide polymorphisms and 61 insertions or deletions. The region containing intra-species variant patterns, as identified in this study, has the potential to develop accession-specific molecular markers, enhancing precision in cultivar classification. These findings are anticipated to drive advancements in breeding strategies, augment biodiversity, and unlock the agricultural potential inherent in H. syriacus.}, } @article {pmid38412041, year = {2024}, author = {Dong, X and Jia, H and Yu, Y and Xiang, Y and Zhang, Y}, title = {Genomic revisitation and reclassification of the genus Providencia.}, journal = {mSphere}, volume = {}, number = {}, pages = {e0073123}, doi = {10.1128/msphere.00731-23}, pmid = {38412041}, issn = {2379-5042}, abstract = {Members of Providencia, although typically opportunistic, can cause severe infections in immunocompromised hosts. Recent advances in genome sequencing provide an opportunity for more precise study of this genus. In this study, we first identified and characterized a novel species named Providencia zhijiangensis sp. nov. It has ≤88.23% average nucleotide identity (ANI) and ≤31.8% in silico DNA-DNA hybridization (dDDH) values with all known Providencia species, which fall significantly below the species-defining thresholds. Interestingly, we found that Providencia stuartii and Providencia thailandensis actually fall under the same species, evidenced by an ANI of 98.59% and a dDDH value of 90.4%. By fusing ANI with phylogeny, we have reclassified 545 genomes within this genus into 20 species, including seven unnamed taxa (provisionally titled Taxon 1-7), which can be further subdivided into 23 lineages. Pangenomic analysis identified 1,550 genus-core genes in Providencia, with coenzymes being the predominant category at 10.56%, suggesting significant intermediate metabolism activity. Resistance analysis revealed that most lineages of the genus (82.61%, 19/23) carry a high number of antibiotic-resistance genes (ARGs) and display diverse resistance profiles. Notably, the majority of ARGs are located on plasmids, underscoring the significant role of plasmids in the resistance evolution within this genus. Three species or lineages (P. stuartii, Taxon 3, and Providencia hangzhouensis L12) that possess the highest number of carbapenem-resistance genes suggest their potential influence on clinical treatment. These findings underscore the need for continued surveillance and study of this genus, particularly due to their role in harboring antibiotic-resistance genes.IMPORTANCEThe Providencia genus, known to harbor opportunistic pathogens, has been a subject of interest due to its potential to cause severe infections, particularly in vulnerable individuals. Our research offers groundbreaking insights into this genus, unveiling a novel species, Providencia zhijiangensis sp. nov., and highlighting the need for a re-evaluation of existing classifications. Our comprehensive genomic assessment offers a detailed classification of 545 genomes into distinct species and lineages, revealing the rich biodiversity and intricate species diversity within the genus. The substantial presence of antibiotic-resistance genes in the Providencia genus underscores potential challenges for public health and clinical treatments. Our study highlights the pressing need for increased surveillance and research, enriching our understanding of antibiotic resistance in this realm.}, } @article {pmid38412007, year = {2024}, author = {Kim, M and Kim, W and Park, Y and Jung, J and Park, W}, title = {Lineage-specific evolution of Aquibium, a close relative of Mesorhizobium, during habitat adaptation.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0209123}, doi = {10.1128/aem.02091-23}, pmid = {38412007}, issn = {1098-5336}, abstract = {The novel genus Aquibium that lacks nitrogenase was recently reclassified from the Mesorhizobium genus. The genomes of Aquibium species isolated from water were smaller and had higher GC contents than those of Mesorhizobium species. Six Mesorhizobium species lacking nitrogenase were found to exhibit low similarity in the average nucleotide identity values to the other 24 Mesorhizobium species. Therefore, they were classified as the non-N2-fixing Mesorhizobium lineage (N-ML), an evolutionary intermediate species. The results of our phylogenomic analyses and the loss of Rhizobiales-specific fur/mur indicated that Mesorhizobium species may have evolved from Aquibium species through an ecological transition. Halotolerant and alkali-resistant Aquibium and Mesorhizobium microcysteis belonging to N-ML possessed many tripartite ATP-independent periplasmic transporter and sodium/proton antiporter subunits composed of seven genes (mrpABCDEFG). These genes were not present in the N2-fixing Mesorhizobium lineage (ML), suggesting that genes acquired for adaptation to highly saline and alkaline environments were lost during the evolution of ML as the habitat changed to soil. Land-to-water habitat changes in Aquibium species, close relatives of Mesorhizobium species, could have influenced their genomic evolution by the gain and loss of genes. Our study indicated that lineage-specific evolution could have played a significant role in shaping their genome architecture and conferring their ability to thrive in different habitats.IMPORTANCEPhylogenetic analyses revealed that the Aquibium lineage (AL) and non-N2-fixing Mesorhizobium lineage (N-ML) were monophyletically grouped into distinct clusters separate from the N2-fixing Mesorhizobium lineage (ML). The N-ML, an evolutionary intermediate species having characteristics of both ancestral and descendant species, could provide a genomic snapshot of the genetic changes that occur during adaptation. Genomic analyses of AL, N-ML, and ML revealed that changes in the levels of genes related to transporters, chemotaxis, and nitrogen fixation likely reflect adaptations to different environmental conditions. Our study sheds light on the complex and dynamic nature of the evolution of rhizobia in response to changes in their environment and highlights the crucial role of genomic analysis in understanding these processes.}, } @article {pmid38411865, year = {2024}, author = {Seo, B and Jeon, K and Kim, WK and Jang, YJ and Cha, KH and Ko, G}, title = {Strain-Specific Anti-Inflammatory Effects of Faecalibacterium prausnitzii Strain KBL1027 in Koreans.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {38411865}, issn = {1867-1314}, support = {E0170600-07//Korea Food Research Institute/ ; RS-2023-00223831//National Research Foundation of Korea/ ; }, abstract = {Faecalibacterium prausnitzii is one of the most dominant commensal bacteria in the human gut, and certain anti-inflammatory functions have been attributed to a single microbial anti-inflammatory molecule (MAM). Simultaneously, substantial diversity among F. prausnitzii strains is acknowledged, emphasizing the need for strain-level functional studies aimed at developing innovative probiotics. Here, two distinct F. prausnitzii strains, KBL1026 and KBL1027, were isolated from Korean donors, exhibiting notable differences in the relative abundance of F. prausnitzii. Both strains were identified as the core Faecalibacterium amplicon sequence variant (ASV) within the healthy Korean cohort, and their MAM sequences showed a high similarity of 98.6%. However, when a single strain was introduced to mice with dextran sulfate sodium (DSS)-induced colitis, KBL1027 showed the most significant ameliorative effects, including alleviation of colonic inflammation and restoration of gut microbial dysbiosis. Moreover, the supernatant from KBL1027 elevated the secretion of IL-10 cytokine more than that of KBL1026 in mouse bone marrow-derived macrophage (BMDM) cells, suggesting that the strain-specific, anti-inflammatory efficacy of KBL1027 might involve effector compounds other than MAM. Through analysis of the Faecalibacterium pan-genome and comparative genomics, strain-specific functions related to extracellular polysaccharide biosynthesis were identified in KBL1027, which could contribute to the observed morphological disparities. Collectively, our findings highlight the strain-specific, anti-inflammatory functions of F. prausnitzii, even within the same core ASV, emphasizing the influence of their human origin.}, } @article {pmid38410456, year = {2024}, author = {Kogay, R and Wolf, YI and Koonin, EV}, title = {Defense systems and horizontal gene transfer in bacteria.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.09.579689}, pmid = {38410456}, abstract = {Horizontal gene transfer (HGT) is a fundamental process in the evolution of prokaryotes, making major contributions to diversification and adaptation. Typically, HGT is facilitated by mobile genetic elements (MGEs), such as conjugative plasmids and phages that generally impose fitness costs on their hosts. However, a substantial fraction of bacterial genes is involved in defense mechanisms that limit the propagation of MGEs, raising the possibility that they can actively restrict HGT. Here we examine whether defense systems curb HGT by exploring the connections between HGT rate and the presence of 73 defense systems in 12 bacterial species. We found that only 6 defense systems, 3 of which are different CRISPR-Cas subtypes, are associated with the reduced gene gain rate on the scale of species evolution. The hosts of such defense systems tend to have a smaller pangenome size and harbor fewer phage-related genes compared to genomes lacking these systems, suggesting that these defense mechanisms inhibit HGT by limiting the integration of prophages. We hypothesize that restriction of HGT by defense systems is species-specific and depends on various ecological and genetic factors, including the burden of MGEs and fitness effect of HGT in bacterial populations.}, } @article {pmid38408562, year = {2024}, author = {Huy, NQ and Linh, NC and Son, NT and Ngoc, DB and Tam, TTT and Hang, LTT and Thuyet, BT and Song, LH and Van Quyen, D and Hayer, J and Bañuls, AL and Sy, BT}, title = {Genomic insights into an extensively drug-resistant and hypervirulent Burkholderia dolosa N149 isolate of a novel sequence type (ST2237) from a Vietnamese patient hospitalized for stroke.}, journal = {Journal of global antimicrobial resistance}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgar.2024.02.009}, pmid = {38408562}, issn = {2213-7173}, abstract = {OBJECTIVES: Burkholderia dolosa is a clinically important opportunistic pathogen in inpatients. Here we characterized an extensively drug-resistant and hypervirulent B. dolosa isolate from a patient hospitalized for stroke.

METHODS: Resistance to 41 antibiotics was tested with the agar disc diffusion, minimum inhibitory concentration, or broth microdilution method. The complete genome was assembled using short-reads and long-reads and the hybrid de novo assembly method. Allelic profiles obtained by multilocus sequence typing were analyzed using the PubMLST database. Antibiotic-resistance and virulence genes were predicted in silico using public databases and the "baargin" workflow. B. dolosa N149 phylogenetic relationships with all available B. dolosa strains and Burkholderia cepacia complex strains were analyzed using the pangenome obtained with Roary.

RESULTS: B. dolosa N149 displayed extensive resistance to 31 antibiotics and intermediate resistance to 4 antibiotics. The complete genome included three circular chromosomes (6,338,630 bp in total) and one plasmid (167,591 bp). Genotypic analysis revealed various gene clusters (acr, amr, amp, emr, ade, bla and tet) associated with resistance to 35 antibiotic classes. The major intrinsic resistance mechanisms were multidrug efflux pump alterations, inactivation and reduced permeability of targeted antibiotics. Moreover, 91 virulence genes (encoding proteins involved in adherence, formation of capsule, biofilm and colony, motility, phagocytosis inhibition, secretion systems, protease secretion, transmission and quorum sensing) were identified. B. dolosa N149 was assigned to a novel sequence type (ST2237) and formed a mono-phylogenetic clade separated from other B. dolosa strains.

CONCLUSION: This study provided insights into the antimicrobial resistance and virulence mechanisms of B. dolosa.}, } @article {pmid38407244, year = {2024}, author = {Selvaraj Anand, S and Wu, CT and Bremer, J and Bhatti, M and Treangen, TJ and Kalia, A and Shelburne, SA and Shropshire, WC}, title = {Identification of a novel CG307 sub-clade in third-generation-cephalosporin-resistant Klebsiella pneumoniae causing invasive infections in the USA.}, journal = {Microbial genomics}, volume = {10}, number = {2}, pages = {}, doi = {10.1099/mgen.0.001201}, pmid = {38407244}, issn = {2057-5858}, abstract = {Despite the notable clinical impact, recent molecular epidemiology regarding third-generation-cephalosporin-resistant (3GC-R) Klebsiella pneumoniae in the USA remains limited. We performed whole-genome sequencing of 3GC-R K. pneumoniae bacteraemia isolates collected from March 2016 to May 2022 at a tertiary care cancer centre in Houston, TX, USA, using Illumina and Oxford Nanopore Technologies platforms. A comprehensive comparative genomic analysis was performed to dissect population structure, transmission dynamics and pan-genomic signatures of our 3GC-R K. pneumoniae population. Of the 178 3GC-R K. pneumoniae bacteraemias that occurred during our study time frame, we were able to analyse 153 (86 %) bacteraemia isolates, 126 initial and 27 recurrent isolates. While isolates belonging to the widely prevalent clonal group (CG) 258 were rarely observed, the predominant CG, 307, accounted for 37 (29 %) index isolates and displayed a significant correlation (Pearson correlation test P value=0.03) with the annual frequency of 3GC-R K. pneumoniae bacteraemia. Interestingly, only 11 % (4/37) of CG307 isolates belonged to the commonly detected 'Texas-specific' clade that has been observed in previous Texas-based K. pneumoniae antimicrobial-resistance surveillance studies. We identified nearly half of our CG307 isolates (n=18) belonged to a novel, monophyletic CG307 sub-clade characterized by the chromosomally encoded bla SHV-205 and unique accessory genome content. This CG307 sub-clade was detected in various regions of the USA, with genome sequences from 24 additional strains becoming recently available in the National Center for Biotechnology Information (NCBI) SRA database. Collectively, this study underscores the emergence and dissemination of a distinct CG307 sub-clade that is a prevalent cause of 3GC-R K. pneumoniae bacteraemia among cancer patients seen in Houston, TX, and has recently been isolated throughout the USA.}, } @article {pmid38402521, year = {2024}, author = {van Westerhoven, AC and Aguilera-Galvez, C and Nakasato-Tagami, G and Shi-Kunne, X and Martinez de la Parte, E and Chavarro-Carrero, E and Meijer, HJG and Feurtey, A and Maryani, N and Ordóñez, N and Schneiders, H and Nijbroek, K and Wittenberg, AHJ and Hofstede, R and García-Bastidas, F and Sørensen, A and Swennen, R and Drenth, A and Stukenbrock, EH and Kema, GHJ and Seidl, MF}, title = {Segmental duplications drive the evolution of accessory regions in a major crop pathogen.}, journal = {The New phytologist}, volume = {}, number = {}, pages = {}, doi = {10.1111/nph.19604}, pmid = {38402521}, issn = {1469-8137}, support = {AG - 442//Bill and Melinda Gates Foundation/ ; 20 04 04 02//Stichting Dioraphte/ ; }, abstract = {Many pathogens evolved compartmentalized genomes with conserved core and variable accessory regions (ARs) that carry effector genes mediating virulence. The fungal plant pathogen Fusarium oxysporum has such ARs, often spanning entire chromosomes. The presence of specific ARs influences the host range, and horizontal transfer of ARs can modify the pathogenicity of the receiving strain. However, how these ARs evolve in strains that infect the same host remains largely unknown. We defined the pan-genome of 69 diverse F. oxysporum strains that cause Fusarium wilt of banana, a significant constraint to global banana production, and analyzed the diversity and evolution of the ARs. Accessory regions in F. oxysporum strains infecting the same banana cultivar are highly diverse, and we could not identify any shared genomic regions and in planta-induced effectors. We demonstrate that segmental duplications drive the evolution of ARs. Furthermore, we show that recent segmental duplications specifically in accessory chromosomes cause the expansion of ARs in F. oxysporum. Taken together, we conclude that extensive recent duplications drive the evolution of ARs in F. oxysporum, which contribute to the evolution of virulence.}, } @article {pmid38399738, year = {2024}, author = {Straková, D and Sánchez-Porro, C and de la Haba, RR and Ventosa, A}, title = {Decoding the Genomic Profile of the Halomicroarcula Genus: Comparative Analysis and Characterization of Two Novel Species.}, journal = {Microorganisms}, volume = {12}, number = {2}, pages = {}, pmid = {38399738}, issn = {2076-2607}, support = {PID2020-118136GB-I00//MCIN/AEI/10.13039/501100011033/ ; P20_01066 and BIO-213//Junta de Andalucía/ ; }, abstract = {The genus Halomicroarcula, classified within the family Haloarculaceae, presently comprises eight haloarchaeal species isolated from diverse saline habitats, such as solar salterns, hypersaline soils, marine salt, and marine algae. Here, a detailed taxogenomic study and comparative genomic analysis of the genus Halomicroarcula was carried out. In addition, two strains, designated S1CR25-12[T] and S3CR25-11[T], that were isolated from hypersaline soils located in the Odiel Saltmarshes in Huelva (Spain) were included in this study. The 16S rRNA and rpoB' gene sequence analyses affiliated the two strains to the genus Halomicroarcula. Typically, the species of the genus Halomicroarcula possess multiple heterogeneous copies of the 16S rRNA gene, which can lead to misclassification of the taxa and overestimation of the prokaryotic diversity. In contrast, the application of overall genome relatedness indexes (OGRIs) augments the capacity for the precise taxonomic classification and categorization of prokaryotic organisms. The relatedness indexes of the two new isolates, particularly digital DNA-DNA hybridization (dDDH), orthologous average nucleotide identity (OrthoANI), and average amino acid identity (AAI), confirmed that strains S1CR25-12[T] (= CECT 30620[T] = CCM 9252[T]) and S3CR25-11[T] (= CECT 30621[T] = CCM 9254[T]) constitute two novel species of the genus Halomicroarcula. The names Halomicroarcula saliterrae sp. nov. and Halomicroarcula onubensis sp. nov. are proposed for S1CR25-12[T] and S3CR25-11[T], respectively. Metagenomic fragment recruitment analysis, conducted using seven shotgun metagenomic datasets, revealed that the species belonging to the genus Halomicroarcula were predominantly recruited from hypersaline soils found in the Odiel Saltmarshes and the ponds of salterns with high salt concentrations. This reinforces the understanding of the extreme halophilic characteristics associated with the genus Halomicroarcula. Finally, comparing pan-genomes across the twenty Halomicroarcula and Haloarcula species allowed for the identification of commonalities and differences between the species of these two related genera.}, } @article {pmid38399654, year = {2024}, author = {Rhoads, DD and Pummill, J and Alrubaye, AAK}, title = {Molecular Genomic Analyses of Enterococcus cecorum from Sepsis Outbreaks in Broilers.}, journal = {Microorganisms}, volume = {12}, number = {2}, pages = {}, doi = {10.3390/microorganisms12020250}, pmid = {38399654}, issn = {2076-2607}, support = {none//Arkansas Biosciences Institute/ ; }, abstract = {Extensive genomic analyses of Enterococcus cecorum isolates from sepsis outbreaks in broilers suggest a polyphyletic origin, likely arising from core genome mutations rather than gene acquisition. This species is a normal intestinal flora of avian species with particular isolates associated with osteomyelitis. More recently, this species has been associated with sepsis outbreaks affecting broilers during the first 3 weeks post-hatch. Understanding the genetic and management basis of this new phenotype is critical for developing strategies to mitigate this emerging problem. Phylogenomic analyses of 227 genomes suggest that sepsis isolates are polyphyletic and closely related to both commensal and osteomyelitis isolate genomes. Pangenome analyses detect no gene acquisitions that distinguish all the sepsis isolates. Core genome single nucleotide polymorphism analyses have identified a number of mutations, affecting the protein-coding sequences, that are enriched in sepsis isolates. The analysis of the protein substitutions supports the mutational origins of sepsis isolates.}, } @article {pmid38397433, year = {2024}, author = {Nedashkovskaya, O and Balabanova, L and Otstavnykh, N and Zhukova, N and Detkova, E and Seitkalieva, A and Bystritskaya, E and Noskova, Y and Tekutyeva, L and Isaeva, M}, title = {In-Depth Genome Characterization and Pan-Genome Analysis of Strain KMM 296, a Producer of Highly Active Alkaline Phosphatase; Proposal for the Reclassification of Cobetia litoralis and Cobetia pacifica as the Later Heterotypic Synonyms of Cobetia amphilecti and Cobetia marina, and Emended Description of the Species Cobetia amphilecti and Cobetia marina.}, journal = {Biomolecules}, volume = {14}, number = {2}, pages = {}, doi = {10.3390/biom14020196}, pmid = {38397433}, issn = {2218-273X}, support = {15.BRK.21.0004 (contract no. 075-15-2021-1052)//the Ministry of Science and Higher Education, Russian Federation/ ; }, abstract = {A strictly aerobic, Gram-stain-negative, rod-shaped, and motile bacterium, designated strain KMM 296, isolated from the coelomic fluid of the mussel Crenomytilus grayanus, was investigated in detail due to its ability to produce a highly active alkaline phosphatase CmAP of the structural family PhoA. A previous taxonomic study allocated the strain to the species Cobetia marina, a member of the family Halomonadaceae of the class Gammaproteobacteria. However, 16S rRNA gene sequencing showed KMM 296's relatedness to Cobetia amphilecti NRIC 0815[T]. The isolate grew with 0.5-19% NaCl at 4-42 °C and hydrolyzed Tweens 20 and 40 and L-tyrosine. The DNA G+C content was 62.5 mol%. The prevalent fatty acids were C18:1 ω7c, C12:0 3-OH, C18:1 ω7c, C12:0, and C17:0 cyclo. The polar lipid profile was characterized by the presence of phosphatidylethanolamine, phosphatidylglycerol, phosphatidic acid, and also an unidentified aminolipid, phospholipid, and a few unidentified lipids. The major respiratory quinone was Q-8. According to phylogenomic and chemotaxonomic evidence, and the nearest neighbors, the strain KMM 296 represents a member of the species C. amphilecti. The genome-based analysis of C. amphilecti NRIC 0815[T] and C. litoralis NRIC 0814[T] showed their belonging to a single species. In addition, the high similarity between the C. pacifica NRIC 0813[T] and C. marina LMG 2217[T] genomes suggests their affiliation to one species. Based on the rules of priority, C. litoralis should be reclassified as a later heterotypic synonym of C. amphilecti, and C. pacifica is a later heterotypic synonym of C. marina. The emended descriptions of the species C. amphilecti and C. marina are also proposed.}, } @article {pmid38396752, year = {2024}, author = {Evseev, PV and Shneider, MM and Kolupaeva, LV and Kasimova, AA and Timoshina, OY and Perepelov, AV and Shpirt, AM and Shelenkov, AA and Mikhailova, YV and Suzina, NE and Knirel, YA and Miroshnikov, KA and Popova, AV}, title = {New Obolenskvirus Phages Brutus and Scipio: Biology, Evolution, and Phage-Host Interaction.}, journal = {International journal of molecular sciences}, volume = {25}, number = {4}, pages = {}, doi = {10.3390/ijms25042074}, pmid = {38396752}, issn = {1422-0067}, support = {20-75-10113//Russian Science Foundation/ ; }, abstract = {Two novel virulent phages of the genus Obolenskvirus infecting Acinetobacter baumannii, a significant nosocomial pathogen, have been isolated and studied. Phages Brutus and Scipio were able to infect A. baumannii strains belonging to the K116 and K82 capsular types, respectively. The biological properties and genomic organization of the phages were characterized. Comparative genomic, phylogenetic, and pangenomic analyses were performed to investigate the relationship of Brutus and Scipio to other bacterial viruses and to trace the possible origin and evolutionary history of these phages and other representatives of the genus Obolenskvirus. The investigation of enzymatic activity of the tailspike depolymerase encoded in the genome of phage Scipio, the first reported virus infecting A. baumannii of the K82 capsular type, was performed. The study of new representatives of the genus Obolenskvirus and mechanisms of action of depolymerases encoded in their genomes expands knowledge about the diversity of viruses within this taxonomic group and strategies of Obolenskvirus-host bacteria interaction.}, } @article {pmid38396294, year = {2024}, author = {Sepich-Poore, GD and McDonald, D and Kopylova, E and Guccione, C and Zhu, Q and Austin, G and Carpenter, C and Fraraccio, S and Wandro, S and Kosciolek, T and Janssen, S and Metcalf, JL and Song, SJ and Kanbar, J and Miller-Montgomery, S and Heaton, R and Mckay, R and Patel, SP and Swafford, AD and Korem, T and Knight, R}, title = {Robustness of cancer microbiome signals over a broad range of methodological variation.}, journal = {Oncogene}, volume = {}, number = {}, pages = {}, pmid = {38396294}, issn = {1476-5594}, support = {U24 CA248454/CA/NCI NIH HHS/United States ; }, abstract = {In 2020, we identified cancer-specific microbial signals in The Cancer Genome Atlas (TCGA) [1]. Multiple peer-reviewed papers independently verified or extended our findings [2-12]. Given this impact, we carefully considered concerns by Gihawi et al. [13] that batch correction and database contamination with host sequences artificially created the appearance of cancer type-specific microbiomes. (1) We tested batch correction by comparing raw and Voom-SNM-corrected data per-batch, finding predictive equivalence and significantly similar features. We found consistent results with a modern microbiome-specific method (ConQuR [14]), and when restricting to taxa found in an independent, highly-decontaminated cohort. (2) Using Conterminator [15], we found low levels of human contamination in our original databases (~1% of genomes). We demonstrated that the increased detection of human reads in Gihawi et al. [13] was due to using a newer human genome reference. (3) We developed Exhaustive, a method twice as sensitive as Conterminator, to clean RefSeq. We comprehensively host-deplete TCGA with many human (pan)genome references. We repeated all analyses with this and the Gihawi et al. [13] pipeline, and found cancer type-specific microbiomes. These extensive re-analyses and updated methods validate our original conclusion that cancer type-specific microbial signatures exist in TCGA, and show they are robust to methodology.}, } @article {pmid38389535, year = {2024}, author = {Patakova, P and Vasylkivska, M and Sedlar, K and Jureckova, K and Bezdicek, M and Lovecka, P and Branska, B and Kastanek, P and Krofta, K}, title = {Whole genome sequencing and characterization of Pantoea agglomerans DBM 3797, endophyte, isolated from fresh hop (Humulus lupulus L.).}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1305338}, pmid = {38389535}, issn = {1664-302X}, abstract = {BACKGROUND: This paper brings new information about the genome and phenotypic characteristics of Pantoea agglomerans strain DBM 3797, isolated from fresh Czech hop (Humulus lupulus) in the Saaz hop-growing region. Although P. agglomerans strains are frequently isolated from different materials, there are not usually thoroughly characterized even if they have versatile metabolism and those isolated from plants may have a considerable potential for application in agriculture as a support culture for plant growth.

METHODS: P. agglomerans DBM 3797 was cultured under aerobic and anaerobic conditions, its metabolites were analyzed by HPLC and it was tested for plant growth promotion abilities, such as phosphate solubilization, siderophore and indol-3-acetic acid productions. In addition, genomic DNA was extracted, sequenced and de novo assembly was performed. Further, genome annotation, pan-genome analysis and selected genome analyses, such as CRISPR arrays detection, antibiotic resistance and secondary metabolite genes identification were carried out.

RESULTS AND DISCUSSION: The typical appearance characteristics of the strain include the formation of symplasmata in submerged liquid culture and the formation of pale yellow colonies on agar. The genetic information of the strain (in total 4.8 Mb) is divided between a chromosome and two plasmids. The strain lacks any CRISPR-Cas system but is equipped with four restriction-modification systems. The phenotypic analysis focused on growth under both aerobic and anaerobic conditions, as well as traits associated with plant growth promotion. At both levels (genomic and phenotypic), the production of siderophores, indoleacetic acid-derived growth promoters, gluconic acid, and enzyme activities related to the degradation of complex organic compounds were found. Extracellular gluconic acid production under aerobic conditions (up to 8 g/l) is probably the result of glucose oxidation by the membrane-bound pyrroloquinoline quinone-dependent enzyme glucose dehydrogenase. The strain has a number of properties potentially beneficial to the hop plant and its closest relatives include the strains also isolated from the aerial parts of plants, yet its safety profile needs to be addressed in follow-up research.}, } @article {pmid38389084, year = {2024}, author = {Miao, J and Wei, X and Cao, C and Sun, J and Xu, Y and Zhang, Z and Wang, Q and Pan, Y and Wang, Z}, title = {Pig pangenome graph reveals functional features of non-reference sequences.}, journal = {Journal of animal science and biotechnology}, volume = {15}, number = {1}, pages = {32}, pmid = {38389084}, issn = {1674-9782}, support = {2022YFF1000500//National Key Research and Development Program of China/ ; 31941007//National Natural Science Foundation of China/ ; 2016C02054-2//Zhejiang province agriculture (livestock) varieties breeding Key Technology R&D Program/ ; }, abstract = {BACKGROUND: The reliance on a solitary linear reference genome has imposed a significant constraint on our comprehensive understanding of genetic variation in animals. This constraint is particularly pronounced for non-reference sequences (NRSs), which have not been extensively studied.

RESULTS: In this study, we constructed a pig pangenome graph using 21 pig assemblies and identified 23,831 NRSs with a total length of 105 Mb. Our findings revealed that NRSs were more prevalent in breeds exhibiting greater genetic divergence from the reference genome. Furthermore, we observed that NRSs were rarely found within coding sequences, while NRS insertions were enriched in immune-related Gene Ontology terms. Notably, our investigation also unveiled a close association between novel genes and the immune capacity of pigs. We observed substantial differences in terms of frequencies of NRSs between Eastern and Western pigs, and the heat-resistant pigs exhibited a substantial number of NRS insertions in an 11.6 Mb interval on chromosome X. Additionally, we discovered a 665 bp insertion in the fourth intron of the TNFRSF19 gene that may be associated with the ability of heat tolerance in Southern Chinese pigs.

CONCLUSIONS: Our findings demonstrate the potential of a graph genome approach to reveal important functional features of NRSs in pig populations.}, } @article {pmid38388650, year = {2024}, author = {Pena-Fernández, N and Ocejo, M and van der Graaf-van Bloois, L and Lavín, JL and Kortabarria, N and Collantes-Fernández, E and Hurtado, A and Aduriz, G}, title = {Comparative pangenomic analysis of Campylobacter fetus isolated from Spanish bulls and other mammalian species.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {4347}, pmid = {38388650}, issn = {2045-2322}, support = {Pre2018-086113 funded by MCIN/AEI/ 10.13039/501100011033 and by "ESF Investing in your future"//Ministerio de Ciencia e Innovación/ ; }, abstract = {Campylobacter fetus comprises two closely related mammal-associated subspecies: Campylobacter fetus subsp. fetus (Cff) and Campylobacter fetus subsp. venerealis (Cfv). The latter causes bovine genital campylobacteriosis, a sexually-transmitted disease endemic in Spain that results in significant economic losses in the cattle industry. Here, 33 C. fetus Spanish isolates were whole-genome sequenced and compared with 62 publicly available C. fetus genomes from other countries. Genome-based taxonomic identification revealed high concordance with in silico PCR, confirming Spanish isolates as Cff (n = 4), Cfv (n = 9) and Cfv biovar intermedius (Cfvi, n = 20). MLST analysis assigned the Spanish isolates to 6 STs, including three novel: ST-76 and ST-77 for Cfv and ST-78 for Cff. Core genome SNP phylogenetic analysis of the 95 genomes identified multiple clusters, revealing associations at subspecies and biovar level between genomes with the same ST and separating the Cfvi genomes from Spain and other countries. A genome-wide association study identified pqqL as a Cfv-specific gene and a potential candidate for more accurate identification methods. Functionality analysis revealed variations in the accessory genome of C. fetus subspecies and biovars that deserve further studies. These results provide valuable information about the regional variants of C. fetus present in Spain and the genetic diversity and predicted functionality of the different subspecies.}, } @article {pmid38385549, year = {2024}, author = {Arizala, D and Arif, M}, title = {Impact of homologous recombination on core genome evolution and host adaptation of Pectobacterium parmentieri.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae032}, pmid = {38385549}, issn = {1759-6653}, abstract = {Homologous recombination is a major force mechanism driving bacterial evolution, host adaptability and acquisition of novel virulence traits. Pectobacterium parmentieri is a plant bacterial pathogen distributed worldwide, primarily affecting potatoes, by causing soft rot and blackleg diseases. The goal of this investigation was to understand the impact of homologous recombination on the genomic evolution of P. parmentieri. Analysis of P. parmentieri genomes using Roary revealed a dynamic pan-genome with 3,742 core genes and over 55% accessory genome variability. Bayesian population structure analysis identified seven lineages, indicating species heterogeneity. ClonalFrameML analysis displayed 5,125 recombination events, with the lineage 4 exhibiting the highest events. fastGEAR analysis identified 486 ancestral and 941 recent recombination events ranging 43 bp - 119 kb and 36 bp - 13.96 kb, respectively, suggesting ongoing adaptation. Notably, 11% (412 genes) of the core genome underwent recent recombination, with lineage 1 as the main donor. The prevalence of recent recombination (double compared to ancient) events implies continuous adaptation, possibly driven by global potato trade. Recombination events were found in genes involved in vital cellular processes (DNA replication, DNA repair, RNA processing, homeostasis, and metabolism), pathogenicity determinants (type secretion systems, cell-wall degrading enzymes, iron scavengers, lipopolysaccharides, flagellum, etc.), antimicrobial compounds (phenazine and colicin) and even CRISPR-Cas genes. Overall, these results emphasize the potential role of homologous recombination in P. parmentieri's evolutionary dynamics, influencing host colonization, pathogenicity, adaptive immunity, and ecological fitness.}, } @article {pmid38385476, year = {2024}, author = {Tariq, DE}, title = {Pangenomic analyses of tuberculosis strains to identify resistomes using computational approaches.}, journal = {JPMA. The Journal of the Pakistan Medical Association}, volume = {74}, number = {1 (Supple-2)}, pages = {S74-S78}, doi = {10.47391/JPMA-DUHS-S15}, pmid = {38385476}, issn = {0030-9982}, abstract = {OBJECTIVE: To locate resistomes in tuberculosis strains, to determine the severity of drug resistance, and to infer its implications with respect to high tuberculosis prevalence in a Third World setting.

METHODS: The pangenomic study was conducted from October 2022 to January 2023 in Sir Syed University of Engineering and Technology, Karachi, and comprised 2012-22 data on multiple sequence alignment to assess the genetic evolution of tuberculosis strains. Antibiotic resistance drug classes were identified using the Canadian Antibiotic Resistance Database, which entailed multidrug-resistant and extremely drug-resistant strains. Also, GenBank was used for tuberculosis genome FASTA (fast-all; nucleotide and protein sequence representation) files, prediction of resistome sequences on the basis of Canadian Antibiotic Resistance Database, and multiple sequence alignment was done in Mauve.

RESULTS: Evolutionarily, the 6 strains identified were structurally similar with polymorphisms in their core chromosomal regions. Their resistome genes showed perfect hits for isoniazid, rifamycin, cephalosporin, fluoroquinolone, aminoglycosides, penem, penam and cephamycin.

CONCLUSION: Drugs discovered in antibiotic resistance genes are now less effective in treatment, and have the potential to develop into more dangerous bacteria, if not monitored. For treatment, staying long durations in hospitals for quality healthcare and supervision in third world countries is unaffordable.}, } @article {pmid38379925, year = {2024}, author = {Turco, S and Russo, S and Pietrucci, D and Filippi, A and Milanesi, M and Luzzago, C and Garbarino, C and Palladini, G and Chillemi, G and Ricchi, M}, title = {High clonality of Mycobacterium avium subsp. paratuberculosis field isolates from red deer revealed by two different methodological approaches of comparative genomic analysis.}, journal = {Frontiers in veterinary science}, volume = {11}, number = {}, pages = {1301667}, pmid = {38379925}, issn = {2297-1769}, abstract = {Mycobacterium avium subsp. paratuberculosis (MAP) is the aetiological agent of paratuberculosis (Johne's disease) in both domestic and wild ruminants. In the present study, using a whole-genome sequence (WGS) approach, we investigated the genetic diversity of 15 Mycobacterium avium field strains isolated in the last 10 years from red deer inhabiting the Stelvio National Park and affected by paratuberculosis. Combining de novo assembly and a reference-based method, followed by a pangenome analysis, we highlight a very close relationship among 13 MAP field isolates, suggesting that a single infecting event occurred in this population. Moreover, two isolates have been classified as Mycobacterium avium subsp. hominissuis, distinct from the other MAPs under comparison but close to each other. This is the first time that this subspecies has been found in Italy in samples without evident epidemiological correlations, having been isolated in two different locations of the Stelvio National Park and in different years. Our study highlights the importance of a multidisciplinary approach incorporating molecular epidemiology and ecology into traditional infectious disease knowledge in order to investigate the nature of infectious disease in wildlife populations.}, } @article {pmid38378816, year = {2024}, author = {Schreiber, M and Jayakodi, M and Stein, N and Mascher, M}, title = {Plant pangenomes for crop improvement, biodiversity and evolution.}, journal = {Nature reviews. Genetics}, volume = {}, number = {}, pages = {}, pmid = {38378816}, issn = {1471-0064}, abstract = {Plant genome sequences catalogue genes and the genetic elements that regulate their expression. Such inventories further research aims as diverse as mapping the molecular basis of trait diversity in domesticated plants or inquiries into the origin of evolutionary innovations in flowering plants millions of years ago. The transformative technological progress of DNA sequencing in the past two decades has enabled researchers to sequence ever more genomes with greater ease. Pangenomes - complete sequences of multiple individuals of a species or higher taxonomic unit - have now entered the geneticists' toolkit. The genomes of crop plants and their wild relatives are being studied with translational applications in breeding in mind. But pangenomes are applicable also in ecological and evolutionary studies, as they help classify and monitor biodiversity across the tree of life, deepen our understanding of how plant species diverged and show how plants adapt to changing environments or new selection pressures exerted by human beings.}, } @article {pmid38376942, year = {2024}, author = {Truong, TC and Park, H and Kim, JH and Tran, VT and Kim, W}, title = {The evolutionary phylodynamics of human parechovirus A type 3 reveal multiple recombination events in South Korea.}, journal = {Journal of medical virology}, volume = {96}, number = {2}, pages = {e29477}, doi = {10.1002/jmv.29477}, pmid = {38376942}, issn = {1096-9071}, support = {NRF-2021R1C1C2003223//National Research Foundation of Korea/ ; NRF-2022R1A2C2012209//National Research Foundation of Korea/ ; }, abstract = {Human parechovirus A (HPeV-A) is a causative agent of respiratory and gastrointestinal illnesses, acute flaccid paralysis encephalitis, meningitis, and neonatal sepsis. To clarify the characteristics of HPeV-A infection in children, 391 fecal specimens were collected from January 2014 to October 2015 from patients with acute gastroenteritis in Seoul, South Korea. Of these, 221/391 (56.5%) HPeV-A positive samples were found in children less than 2 years old. Three HPeV-A genotypes HPeV-A1 (117/221; 52.94%), HPeV-A3 (100/221; 45.25%), and HPeV-A6 (4/221; 1.81%) were detected, among which HPeV-A3 was predominant with the highest recorded value of 58.6% in 2015. Moreover, recombination events in the Korean HPeV-A3 strains were detected. Phylogenetic analysis revealed that the capsid-encoding regions and noncapsid gene 2A of the four Korean HPeV-A3 strains are closely related to the HPeV-A3 strains isolated in Canada in 2007 (Can82853-01), Japan in 2008 (A308/99), and Taiwan in 2011 (TW-03067-2011) while noncapsid genes P2 (2B-2C) and P3 (3A-3D) are closely related to those of HPeV-A1 strains BNI-788St (Germany in 2008) and TW-71594-2010 (Taiwan in 2010). This first report on the whole-genome analysis of HPeV-A3 in Korea provides insight into the evolving status and pathogenesis of HPeVs in children.}, } @article {pmid38376382, year = {2024}, author = {Cooper, HB and Vezina, B and Hawkey, J and Passet, V and López-Fernández, S and Monk, JM and Brisse, S and Holt, KE and Wyres, KL}, title = {A validated pangenome-scale metabolic model for the Klebsiella pneumoniae species complex.}, journal = {Microbial genomics}, volume = {10}, number = {2}, pages = {}, doi = {10.1099/mgen.0.001206}, pmid = {38376382}, issn = {2057-5858}, abstract = {The Klebsiella pneumoniae species complex (KpSC) is a major source of nosocomial infections globally with high rates of resistance to antimicrobials. Consequently, there is growing interest in understanding virulence factors and their association with cellular metabolic processes for developing novel anti-KpSC therapeutics. Phenotypic assays have revealed metabolic diversity within the KpSC, but metabolism research has been neglected due to experiments being difficult and cost-intensive. Genome-scale metabolic models (GSMMs) represent a rapid and scalable in silico approach for exploring metabolic diversity, which compile genomic and biochemical data to reconstruct the metabolic network of an organism. Here we use a diverse collection of 507 KpSC isolates, including representatives of globally distributed clinically relevant lineages, to construct the most comprehensive KpSC pan-metabolic model to date, KpSC pan v2. Candidate metabolic reactions were identified using gene orthology to known metabolic genes, prior to manual curation via extensive literature and database searches. The final model comprised a total of 3550 reactions, 2403 genes and can simulate growth on 360 unique substrates. We used KpSC pan v2 as a reference to derive strain-specific GSMMs for all 507 KpSC isolates, and compared these to GSMMs generated using a prior KpSC pan-reference (KpSC pan v1) and two single-strain references. We show that KpSC pan v2 includes a greater proportion of accessory reactions (8.8 %) than KpSC pan v1 (2.5 %). GSMMs derived from KpSC pan v2 also generate more accurate growth predictions, with high median accuracies of 95.4 % (aerobic, n=37 isolates) and 78.8 % (anaerobic, n=36 isolates) for 124 matched carbon substrates. KpSC pan v2 is freely available at https://github.com/kelwyres/KpSC-pan-metabolic-model, representing a valuable resource for the scientific community, both as a source of curated metabolic information and as a reference to derive accurate strain-specific GSMMs. The latter can be used to investigate the relationship between KpSC metabolism and traits of interest, such as reservoirs, epidemiology, drug resistance or virulence, and ultimately to inform novel KpSC control strategies.}, } @article {pmid38376357, year = {2024}, author = {Benning, S and Pritsch, K and Radl, V and Siani, R and Wang, Z and Schloter, M}, title = {(Pan)genomic analysis of two Rhodococcus isolates and their role in phenolic compound degradation.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0378323}, doi = {10.1128/spectrum.03783-23}, pmid = {38376357}, issn = {2165-0497}, abstract = {The genus Rhodococcus is recognized for its potential to degrade a large range of aromatic substances, including plant-derived phenolic compounds. We used comparative genomics in the context of the broader Rhodococcus pan-genome to study genomic traits of two newly described Rhodococcus strains (type-strain Rhodococcus pseudokoreensis R79[T] and Rhodococcus koreensis R85) isolated from apple rhizosphere. Of particular interest was their ability to degrade phenolic compounds as part of an integrated approach to treat apple replant disease (ARD) syndrome. The pan-genome of the genus Rhodococcus based on 109 high-quality genomes was open with a small core (1.3%) consisting of genes assigned to basic cell functioning. The range of genome sizes in Rhodococcus was high, from 3.7 to 10.9 Mbp. Genomes from host-associated strains were generally smaller compared to environmental isolates which were characterized by exceptionally large genome sizes. Due to large genomic differences, we propose the reclassification of distinct groups of rhodococci like the Rhodococcus equi cluster to new genera. Taxonomic species affiliation was the most important factor in predicting genetic content and clustering of the genomes. Additionally, we found genes that discriminated between the strains based on habitat. All members of the genus Rhodococcus had at least one gene involved in the pathway for the degradation of benzoate, while biphenyl degradation was mainly restricted to strains in close phylogenetic relationships with our isolates. The ~40% of genes still unclassified in larger Rhodococcus genomes, particularly those of environmental isolates, need more research to explore the metabolic potential of this genus.IMPORTANCERhodococcus is a diverse, metabolically powerful genus, with high potential to adapt to different habitats due to the linear plasmids and large genome sizes. The analysis of its pan-genome allowed us to separate host-associated from environmental strains, supporting taxonomic reclassification. It was shown which genes contribute to the differentiation of the genomes based on habitat, which can possibly be used for targeted isolation and screening for desired traits. With respect to apple replant disease (ARD), our isolates showed genome traits that suggest potential for application in reducing plant-derived phenolic substances in soil, which makes them good candidates for further testing against ARD.}, } @article {pmid38375235, year = {2024}, author = {Lagerstrom, KM and Scales, NC and Hadly, EA}, title = {Impressive pan-genomic diversity of E. coli from a wild animal community near urban development reflects human impacts.}, journal = {iScience}, volume = {27}, number = {3}, pages = {109072}, pmid = {38375235}, issn = {2589-0042}, abstract = {Human and domesticated animal waste infiltrates global freshwater, terrestrial, and marine environments, widely disseminating fecal microbes, antibiotics, and other chemical pollutants. Emerging evidence suggests that guts of wild animals are being invaded by our microbes, including Escherichia coli, which face anthropogenic selective pressures to gain antimicrobial resistance (AMR) and increase virulence. However, wild animal sources remain starkly under-represented among genomic sequence repositories. We sequenced whole genomes of 145 E. coli isolates from 55 wild and 13 domestic animal fecal samples, averaging 2 (ranging 1-7) isolates per sample, on a preserve imbedded in a human-dominated landscape in California Bay Area, USA, to assess AMR, virulence, and pan-genomic diversity. With single nucleotide polymorphism analyses we predict potential transmission routes. We illustrate the usefulness of E. coli to aid our understanding of and ability to surveil the emergence of zoonotic pathogens created by the mixing of human and wild bacteria in the environment.}, } @article {pmid38370750, year = {2024}, author = {Bolognini, D and Halgren, A and Lou, RN and Raveane, A and Rocha, JL and Guarracino, A and Soranzo, N and Chin, J and Garrison, E and Sudmant, PH}, title = {Global diversity, recurrent evolution, and recent selection on amylase structural haplotypes in humans.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.07.579378}, pmid = {38370750}, abstract = {The adoption of agriculture, first documented ∼12,000 years ago in the Fertile Crescent, triggered a rapid shift toward starch-rich diets in human populations. Amylase genes facilitate starch digestion and increased salivary amylase copy number has been observed in some modern human populations with high starch intake, though evidence of recent selection is lacking. Here, using 52 long-read diploid assemblies and short read data from ∼5,600 contemporary and ancient humans, we resolve the diversity, evolutionary history, and selective impact of structural variation at the amylase locus. We find that both salivary and pancreatic amylase genes have higher copy numbers in populations with agricultural subsistence compared to fishing, hunting, and pastoral groups. We identify 28 distinct amylase structural architectures and demonstrate that identical structures have arisen independently multiple times throughout recent human history. Using a pangenome graph-based approach to infer structural haplotypes across thousands of humans, we identify extensively duplicated haplotypes present at higher frequencies in modern agricultural populations. Leveraging 534 ancient human genomes we find that duplication-containing haplotypes have increased in frequency more than seven-fold over the last 12,000 years providing evidence for recent selection in Eurasians at this locus comparable in magnitude to that at lactase. Together, our study highlights the strong impact of the agricultural revolution on human genomes and the importance of long-read sequencing in identifying signatures of selection at structurally complex loci.}, } @article {pmid38370713, year = {2024}, author = {Lypaczewski, P and Chac, D and Dunmire, CN and Tandoc, KM and Chowdhury, F and Khan, AI and Bhuiyan, T and Harris, JB and LaRocque, RC and Calderwood, SB and Ryan, ET and Qadri, F and Shapiro, BJ and Weil, AA}, title = {Diversity of Vibrio cholerae O1 through the human gastrointestinal tract during cholera.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.02.08.579476}, pmid = {38370713}, abstract = {UNLABELLED: Vibrio cholerae O1 causes the diarrheal disease cholera, and the small intestine is the site of active infection. During cholera, cholera toxin is secreted from V. cholerae and induces a massive fluid influx into the small intestine, which causes vomiting and diarrhea. Typically, V. cholerae genomes are sequenced from bacteria passed in stool, but rarely from vomit, a fluid that may more closely represents the site of active infection. We hypothesized that the V. cholerae O1 population bottlenecks along the gastrointestinal tract would result in reduced genetic variation in stool compared to vomit. To test this, we sequenced V. cholerae genomes from ten cholera patients with paired vomit and stool samples. Genetic diversity was low in both vomit and stool, consistent with a single infecting population rather than co-infection with divergent V. cholerae O1 lineages. The number of single nucleotide variants decreased between vomit and stool in four patients, increased in two, and remained unchanged in four. The number of genes encoded in the V. cholerae genome decreased between vomit and stool in eight patients and increased in two. Pangenome analysis of assembled short-read sequencing demonstrated that the toxin-coregulated pilus operon more frequently contained deletions in genomes from vomit compared to stool. However, these deletions were not detected by PCR or long-read sequencing, indicating that interpreting gene presence or absence patterns from short-read data alone may be incomplete. Overall, we found that V. cholerae O1 isolated from stool is genetically similar to V. cholerae recovered from the upper intestinal tract.

IMPORTANCE: Vibrio cholerae O1, the bacterium that causes cholera, is ingested in contaminated food or water and then colonizes the upper small intestine and is excreted in stool. Shed V. cholerae genomes are usually studied, but V. cholerae isolated from vomit may be more representative of where V. cholerae colonizes in the upper intestinal epithelium. V. cholerae may experience bottlenecks, or large reductions in bacterial population sizes or genetic diversity, as it passes through the gut. Passage through the gut may select for distinct V. cholerae mutants that are adapted for survival and gut colonization. We did not find strong evidence for such adaptive mutations, and instead observed that passage through the gut results in modest reductions in V. cholerae genetic diversity, and only in some patients. These results fill a gap in our understanding of the V. cholerae life cycle, transmission, and evolution.}, } @article {pmid38370577, year = {2023}, author = {Yuan, C and An, T and Li, X and Zou, J and Lin, Z and Gu, J and Hu, R and Fang, Z}, title = {Genomic analysis of Ralstonia pickettii reveals the genetic features for potential pathogenicity and adaptive evolution in drinking water.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1272636}, pmid = {38370577}, issn = {1664-302X}, abstract = {Ralstonia pickettii, the most critical clinical pathogen of the genus Ralstonia, has been identified as a causative agent of numerous harmful infections. Additionally, Ralstonia pickettii demonstrates adaptability to extreme environmental conditions, such as those found in drinking water. In this study, we conducted a comprehensive genomic analysis to investigate the genomic characteristics related to potential pathogenicity and adaptive evolution in drinking water environments of Ralstonia pickettii. Through phylogenetic analysis and population genetic analysis, we divided Ralstonia pickettii into five Groups, two of which were associated with drinking water environments. The open pan-genome with a large and flexible gene repertoire indicated a high genetic plasticity. Significant differences in functional enrichment were observed between the core- and pan-genome of different groups. Diverse mobile genetic elements (MGEs), extensive genomic rearrangements, and horizontal gene transfer (HGT) events played a crucial role in generating genetic diversity. In drinking water environments, Ralstonia pickettii exhibited strong adaptability, and the acquisition of specific adaptive genes was potentially facilitated by genomic islands (GIs) and HGT. Furthermore, environmental pressures drove the adaptive evolution of Ralstonia pickettii, leading to the accumulation of unique mutations in key genes. These mutations may have a significant impact on various physiological functions, particularly carbon metabolism and energy metabolism. The presence of virulence-related elements associated with macromolecular secretion systems, virulence factors, and antimicrobial resistance indicated the potential pathogenicity of Ralstonia pickettii, making it capable of causing multiple nosocomial infections. This study provides comprehensive insights into the potential pathogenicity and adaptive evolution of Ralstonia pickettii in drinking water environments from a genomic perspective.}, } @article {pmid38365240, year = {2024}, author = {Shen, L and Liu, Y and Chen, L and Lei, T and Ren, P and Ji, M and Song, W and Lin, H and Su, W and Wang, S and Rooman, M and Pucci, F}, title = {Genomic basis of environmental adaptation in the widespread poly-extremophilic Exiguobacterium group.}, journal = {The ISME journal}, volume = {18}, number = {1}, pages = {}, doi = {10.1093/ismejo/wrad020}, pmid = {38365240}, issn = {1751-7370}, support = {U21A20176//National Natural Science Foundation of China/ ; 2019QZKK0503//Second Tibetan Plateau Scientific Expedition and Research/ ; 92251304//National Natural Science Foundation of China/ ; swzy202008//Open Project Fund of Anhui Provincial Key Laboratory of Protection and Utilization of Important Biological Resources/ ; 2022AH010012//Anhui Provincial Engineering Research Centre for Molecular Detection and Diagnostics/ ; }, abstract = {Delineating cohesive ecological units and determining the genetic basis for their environmental adaptation are among the most important objectives in microbiology. In the last decade, many studies have been devoted to characterizing the genetic diversity in microbial populations to address these issues. However, the impact of extreme environmental conditions, such as temperature and salinity, on microbial ecology and evolution remains unclear so far. In order to better understand the mechanisms of adaptation, we studied the (pan)genome of Exiguobacterium, a poly-extremophile bacterium able to grow in a wide range of environments, from permafrost to hot springs. To have the genome for all known Exiguobacterium type strains, we first sequenced those that were not yet available. Using a reverse-ecology approach, we showed how the integration of phylogenomic information, genomic features, gene and pathway enrichment data, regulatory element analyses, protein amino acid composition, and protein structure analyses of the entire Exiguobacterium pangenome allows to sharply delineate ecological units consisting of mesophilic, psychrophilic, halophilic-mesophilic, and halophilic-thermophilic ecotypes. This in-depth study clarified the genetic basis of the defined ecotypes and identified some key mechanisms driving the environmental adaptation to extreme environments. Our study points the way to organizing the vast microbial diversity into meaningful ecologically units, which, in turn, provides insight into how microbial communities adapt and respond to different environmental conditions in a changing world.}, } @article {pmid38364871, year = {2024}, author = {Wu, Z and Li, T and Jiang, Z and Zheng, J and Gu, Y and Liu, Y and Liu, Y and Xie, Z}, title = {Human pangenome analysis of sequences missing from the reference genome reveals their widespread evolutionary, phenotypic, and functional roles.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae086}, pmid = {38364871}, issn = {1362-4962}, support = {2019YFA0904400//National Key Research and Development Program of China/ ; 202201020336//Science and Technology Program of Guangzhou, China/ ; }, abstract = {Nonreference sequences (NRSs) are DNA sequences present in global populations but absent in the current human reference genome. However, the extent and functional significance of NRSs in the human genomes and populations remains unclear. Here, we de novo assembled 539 genomes from five genetically divergent human populations using long-read sequencing technology, resulting in the identification of 5.1 million NRSs. These were merged into 45284 unique NRSs, with 29.7% being novel discoveries. Among these NRSs, 38.7% were common across the five populations, and 35.6% were population specific. The use of a graph-based pangenome approach allowed for the detection of 565 transcript expression quantitative trait loci on NRSs, with 426 of these being novel findings. Moreover, 26 NRS candidates displayed evidence of adaptive selection within human populations. Genes situated in close proximity to or intersecting with these candidates may be associated with metabolism and type 2 diabetes. Genome-wide association studies revealed 14 NRSs to be significantly associated with eight phenotypes. Additionally, 154 NRSs were found to be in strong linkage disequilibrium with 258 phenotype-associated SNPs in the GWAS catalogue. Our work expands the understanding of human NRSs and provides novel insights into their functions, facilitating evolutionary and biomedical researches.}, } @article {pmid38361606, year = {2024}, author = {Bonnie, JK and Ahmed, OY and Langmead, B}, title = {DandD: Efficient measurement of sequence growth and similarity.}, journal = {iScience}, volume = {27}, number = {3}, pages = {109054}, doi = {10.1016/j.isci.2024.109054}, pmid = {38361606}, issn = {2589-0042}, abstract = {Genome assembly databases are growing rapidly. The redundancy of sequence content between a new assembly and previous ones is neither conceptually nor algorithmically easy to measure. We introduce pertinent methods and DandD, a tool addressing how much new sequence is gained when a sequence collection grows. DandD can describe how much structural variation is discovered in each new human genome assembly and when discoveries will level off in the future. DandD uses a measure called δ ("delta"), developed initially for data compression and chiefly dependent on k-mer counts. DandD rapidly estimates δ using genomic sketches. We propose δ as an alternative to k-mer-specific cardinalities when computing the Jaccard coefficient, thereby avoiding the pitfalls of a poor choice of k. We demonstrate the utility of DandD's functions for estimating δ, characterizing the rate of pangenome growth, and computing all-pairs similarities using k-independent Jaccard.}, } @article {pmid38356529, year = {2024}, author = {Zhou, L and Liu, D and Zhu, Y and Zhang, Z and Chen, S and Zhao, G and Zheng, H}, title = {Advance typing of Vibrio parahaemolyticus through the mtlA and aer gene: A high-resolution, cost-effective approach.}, journal = {Heliyon}, volume = {10}, number = {3}, pages = {e25642}, pmid = {38356529}, issn = {2405-8440}, abstract = {Vibrio parahaemolyticus is a significant cause of foodborne illness, and its incidence worldwide is on the rise. It is thus imperative to develop a straightforward and efficient method for typing strains of this pathogen. In this study, we conducted a pangenome analysis of 75 complete genomes of V. parahaemolyticus and identified the core gene mtlA with the highest degree of variation, which distinguished 44 strains and outperformed traditional seven-gene-based MLST when combined with aer, another core gene with high degree of variation. The mtlA gene had higher resolution to type strains with a close relationship compared to the traditional MLST genes in the phylogenetic tree built by core genomes. Strong positive selection was also detected in the gene mtlA (ω > 1), representing adaptive and evolution in response to the environment. Therefore, the panel of gene mtlA and aer may serve as a tool for the typing of V. parahaemolyticus, potentially contributing to the prevention and control of this foodborne disease.}, } @article {pmid38355307, year = {2024}, author = {Leonard, AS and Mapel, XM and Pausch, H}, title = {Pangenome genotyped structural variation improves molecular phenotype mapping in cattle.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.278267.123}, pmid = {38355307}, issn = {1549-5469}, abstract = {Expression and splicing quantitative trait loci (e/sQTL) are large contributors to phenotypic variability. Achieving sufficient statistical power for e/sQTL mapping requires large cohorts with both genotypes and molecular phenotypes, and so the genomic variation is often called from short-read alignments which are unable to comprehensively resolve structural variation. Here we build a pangenome from 16 HiFi haplotype-resolved assemblies to identify small and structural variation and genotype them with PanGenie in 307 short-read samples. We find high (>90%) concordance of PanGenie-genotyped and DeepVariant-called small variation, and confidently genotype close to 21M small and 43k structural variants in the larger population. We validate 85% of these structural variants (with MAF>0.1) directly with a subset of 25 short-read samples that also have medium coverage HiFi reads. We then conduct e/sQTL mapping with this comprehensive variant set in a subset of 117 cattle that have testis transcriptome data and find 92 structural variants as causal candidates for eQTL and 73 for sQTL. We find that roughly half of top associated structural variants affecting expression or splicing are transposable elements, such as SV-eQTLs for STN1 and MYH7 and SV-sQTLs for CEP89 and ASAH2 Extensive linkage disequilibrium between small and structural variation results in only 28 additional eQTL and 17 sQTL discovered when including SVs, although many top associated SVs are compelling candidates.}, } @article {pmid38352482, year = {2024}, author = {Raghuram, V and Petit, RA and Karol, Z and Mehta, R and Weissman, DB and Read, TD}, title = {Average Nucleotide Identity based Staphylococcus aureus strain grouping allows identification of strain-specific genes in the pangenome.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.01.29.577756}, pmid = {38352482}, abstract = {UNLABELLED: Staphylococcus aureus causes both hospital and community acquired infections in humans worldwide. Due to the high incidence of infection S. aureus is also one of the most sampled and sequenced pathogens today, providing an outstanding resource to understand variation at the bacterial subspecies level. We processed and downsampled 83,383 public S. aureus Illumina whole genome shotgun sequences and 1,263 complete genomes to produce 7,954 representative substrains. Pairwise comparison of core gene Average Nucleotide Identity (ANI) revealed a natural boundary of 99.5% that could be used to define 145 distinct strains within the species. We found that intermediate frequency genes in the pangenome (present in 10-95% of genomes) could be divided into those closely linked to strain background ("strain-concentrated") and those highly variable within strains ("strain-diffuse"). Non-core genes had different patterns of chromosome location; notably, strain-diffuse associated with prophages, strain-concentrated with the vSaβ genome island and rare genes (<10% frequency) concentrated near the origin of replication. Antibiotic genes were enriched in the strain-diffuse class, while virulence genes were distributed between strain-diffuse, strain-concentrated, core and rare classes. This study shows how different patterns of gene movement help create strains as distinct subspecies entities and provide insight into the diverse histories of important S. aureus functions.

IMPORTANCE: We analyzed the genomic diversity of Staphylococcus aureus , a globally prevalent bacterial species that causes serious infections in humans. Our goal was to build a genetic picture of the different strains of S. aureus and which genes may be associated with them. We used a large public dataset (>84,000 genomes) that was re-processed and subsampled to remove redundancy. We found that individual genomes could be grouped into strains by sharing > 99.5% identical nucleotide sequence of the core part of their genome. We also showed that a portion of genes that are present in intermediate frequency in the species are strongly associated with some strains but completely absent from others, suggesting a role in strain-specificity. This work lays the foundation for understanding individual gene histories of the S. aureus species and also outlines strategies for processing large bacterial genomic datasets.}, } @article {pmid38351383, year = {2024}, author = {Li, X and Wang, Y and Cai, C and Ji, J and Han, F and Zhang, L and Chen, S and Zhang, L and Yang, Y and Tang, Q and Bucher, J and Wang, X and Yang, L and Zhuang, M and Zhang, K and Lv, H and Bonnema, G and Zhang, Y and Cheng, F}, title = {Large-scale gene expression alterations introduced by structural variation drive morphotype diversification in Brassica oleracea.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {38351383}, issn = {1546-1718}, support = {31972411//National Natural Science Foundation of China (National Science Foundation of China)/ ; 31722048//National Natural Science Foundation of China (National Science Foundation of China)/ ; 32172578//National Natural Science Foundation of China (National Science Foundation of China)/ ; 201809110159//China Scholarship Council (CSC)/ ; }, abstract = {Brassica oleracea, globally cultivated for its vegetable crops, consists of very diverse morphotypes, characterized by specialized enlarged organs as harvested products. This makes B. oleracea an ideal model for studying rapid evolution and domestication. We constructed a B. oleracea pan-genome from 27 high-quality genomes representing all morphotypes and their wild relatives. We identified structural variations (SVs) among these genomes and characterized these in 704 B. oleracea accessions using graph-based genome tools. We show that SVs exert bidirectional effects on the expression of numerous genes, either suppressing through DNA methylation or promoting probably by harboring transcription factor-binding elements. The following examples illustrate the role of SVs modulating gene expression: SVs promoting BoPNY and suppressing BoCKX3 in cauliflower/broccoli, suppressing BoKAN1 and BoACS4 in cabbage and promoting BoMYBtf in ornamental kale. These results provide solid evidence for the role of SVs as dosage regulators of gene expression, driving B. oleracea domestication and diversification.}, } @article {pmid38346372, year = {2024}, author = {Chen, Y and Li, X and Liu, Z and Hu, M and Ma, J and Luo, Y and Zhang, Q and Li, L and Zhao, X and Zhao, M and Liu, W and Liu, Y}, title = {Genomic analysis and experimental pathogenic characterization of Riemerella anatipestifer isolates from chickens in China.}, journal = {Poultry science}, volume = {103}, number = {4}, pages = {103497}, doi = {10.1016/j.psj.2024.103497}, pmid = {38346372}, issn = {1525-3171}, abstract = {Waterfowl have a high likelihood of being infected with Riemerella anatipestifer. Although the pathogen is found in domestic ducks, turkeys, geese, and wild birds, there is little information available about the consequences of infection during egg laying and hatching in chickens. Here, we present the first report of a novel sequence type of R. anatipestifer S63 isolated from chickens in China. On the basis of pan-genome analysis, we showed S63's genome occupies a distinct branch with other R. anatipestifer isolates from other hosts. Galleria mellonella larval tests indicated that S63 is less virulent than R. anatipestifer Ra36 isolated from ducks. Ducks and hens are susceptible to S63 infection. There is no mortality rate for chickens or ducks, but adult chickens experience neurological symptoms that reduce egg production and hatching rates. In chickens, S63 might be passed vertically from parents to offspring, resulting in "jelly-like" lifeless embryos. Using quantitative PCR, S63 was detected in the brain, liver, reproductive organs, and embryos. As far as we know, this is the first report of R. anatipestifer in hens, a disease that can reduce egg productivity, lower hatching rates, and produce jelly-like lifeless embryos, and the first report to raise the possibility that hens can be infected by roosters via semen.}, } @article {pmid38339052, year = {2024}, author = {Zhang, T and Chen, X and Yan, W and Li, M and Huang, W and Liu, Q and Li, Y and Guo, C and Shu, Y}, title = {Comparative Analysis of Chloroplast Pan-Genomes and Transcriptomics Reveals Cold Adaptation in Medicago sativa.}, journal = {International journal of molecular sciences}, volume = {25}, number = {3}, pages = {}, doi = {10.3390/ijms25031776}, pmid = {38339052}, issn = {1422-0067}, support = {LH2022C050//Natural Science Foundation of Heilongjiang Province/ ; HSDSSCX2023-42//the Innovative Project for Postgraduate Students of Harbin Normal University/ ; FKL-202203//the Open Fund of Yunnan Province Flower Breeding Key Laboratory/ ; 202301BD070001-208//Agriculture Joint Special Project of Science and Technology Plan Project of Yunnan Science and Technology Department/ ; 530000210000000013742//the Green Food Brand Build a Special Project (Floriculture) supported by Yunnan Provincial Fi-nance Department/ ; U21A20182//the Natural and Science Foundation of China/ ; Qian Liu//Construction of Tengchong Rural Revitalization Technological Innovation County/ ; }, abstract = {Alfalfa (Medicago sativa) is a perennial forage legume that is widely distributed all over the world; therefore, it has an extremely complex genetic background. Though population structure and phylogenetic studies have been conducted on a large group of alfalfa nuclear genomes, information about the chloroplast genomes is still lacking. Chloroplast genomes are generally considered to be conservative and play an important role in population diversity analysis and species adaptation in plants. Here, 231 complete alfalfa chloroplast genomes were successfully assembled from 359 alfalfa resequencing data, on the basis of which the alfalfa chloroplast pan-genome was constructed. We investigated the genetic variations of the alfalfa chloroplast genome through comparative genomic, genetic diversity, phylogenetic, population genetic structure, and haplotype analysis. Meanwhile, the expression of alfalfa chloroplast genes under cold stress was explored through transcriptome analysis. As a result, chloroplast genomes of 231 alfalfa lack an IR region, and the size of the chloroplast genome ranges from 125,192 bp to 126,105 bp. Using population structure, haplotypes, and construction of a phylogenetic tree, it was found that alfalfa populations could be divided into four groups, and multiple highly variable regions were found in the alfalfa chloroplast genome. Transcriptome analysis showed that tRNA genes were significantly up-regulated in the cold-sensitive varieties, while rps7, rpl32, and ndhB were down-regulated, and the editing efficiency of ycf1, ycf2, and ndhF was decreased in the cold-tolerant varieties, which may be due to the fact that chloroplasts store nutrients through photosynthesis to resist cold. The huge number of genetic variants in this study provide powerful resources for molecular markers.}, } @article {pmid38337024, year = {2024}, author = {Andorf, CM and Haley, OC and Hayford, RK and Portwood, JL and Harding, S and Sen, S and Cannon, EK and Gardiner, JM and Kim, HS and Woodhouse, MR}, title = {PanEffect: a pan-genome visualization tool for variant effects in maize.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btae073}, pmid = {38337024}, issn = {1367-4811}, abstract = {UNLABELLED: Understanding the effects of genetic variants is crucial for accurately predicting traits and functional outcomes. Recent approaches have utilized artificial intelligence and protein language models to score all possible missense variant effects at the proteome level for a single genome, but a reliable tool is needed to explore these effects at the pan-genome level. To address this gap, we introduce a new tool called PanEffect. We implemented PanEffect at MaizeGDB to enable a comprehensive examination of the potential effects of coding variants across 50 maize genomes. The tool allows users to visualize over 550 million possible amino acid substitutions in the B73 maize reference genome and to observe the effects of the 2.3 million natural variations in the maize pan-genome. Each variant effect score, calculated from the Evolutionary Scale Modeling (ESM) protein language model, shows the log-likelihood ratio difference between B73 and all variants in the pan-genome. These scores are shown using heatmaps spanning benign outcomes to potential functional consequences. Additionally, PanEffect displays secondary structures and functional domains along with the variant effects, offering additional functional and structural context. Using PanEffect, researchers now have a platform to explore protein variants and identify genetic targets for crop enhancement.

AVAILABILITY: The PanEffect code is freely available on GitHub (https://github.com/Maize-Genetics-and-Genomics-Database/PanEffect). A maize implementation of PanEffect and underlying datasets are available at MaizeGDB (https://www.maizegdb.org/effect/maize/).

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid38334660, year = {2024}, author = {Bachari, A and Nassar, N and Telukutla, S and Zomer, R and Piva, TJ and Mantri, N}, title = {Evaluating the Mechanism of Cell Death in Melanoma Induced by the Cannabis Extract PHEC-66.}, journal = {Cells}, volume = {13}, number = {3}, pages = {}, doi = {10.3390/cells13030268}, pmid = {38334660}, issn = {2073-4409}, support = {Not Applicable//MGC Pharmaceuticals Ltd/ ; }, abstract = {Research suggests the potential of using cannabinoid-derived compounds to function as anticancer agents against melanoma cells. Our recent study highlighted the remarkable in vitro anticancer effects of PHEC-66, an extract from Cannabis sativa, on the MM418-C1, MM329, and MM96L melanoma cell lines. However, the complete molecular mechanism behind this action remains to be elucidated. This study aims to unravel how PHEC-66 brings about its antiproliferative impact on these cell lines, utilising diverse techniques such as real-time polymerase chain reaction (qPCR), assays to assess the inhibition of CB1 and CB2 receptors, measurement of reactive oxygen species (ROS), apoptosis assays, and fluorescence-activated cell sorting (FACS) for apoptosis and cell cycle analysis. The outcomes obtained from this study suggest that PHEC-66 triggers apoptosis in these melanoma cell lines by increasing the expression of pro-apoptotic markers (BAX mRNA) while concurrently reducing the expression of anti-apoptotic markers (Bcl-2 mRNA). Additionally, PHEC-66 induces DNA fragmentation, halting cell progression at the G1 cell cycle checkpoint and substantially elevating intracellular ROS levels. These findings imply that PHEC-66 might have potential as an adjuvant therapy in the treatment of malignant melanoma. However, it is essential to conduct further preclinical investigations to delve deeper into its potential and efficacy.}, } @article {pmid38332778, year = {2024}, author = {Sakurai, A and Suzuki, M and Hayashi, K and Doi, Y}, title = {Taxonomic classification of genus Aeromonas using open reading frame-based binarized structure network analysis.}, journal = {Fujita medical journal}, volume = {10}, number = {1}, pages = {8-15}, doi = {10.20407/fmj.2023-007}, pmid = {38332778}, issn = {2189-7255}, abstract = {OBJECTIVES: Taxonomic assignment based on whole-genome sequencing data facilitates clear demarcation of species within a complex genus. Here, we applied a unique pan-genome phylogenetic method, open reading frame (ORF)-based binarized structure network analysis (OSNA), for taxonomic inference of Aeromonas spp., a complex taxonomic group consisting of 30 species.

METHODS: Data from 335 publicly available Aeromonas genomes, including the reference genomes of 30 species, were used to build a phylogenetic tree using OSNA. In OSNA, whole-genome structures are expressed as binary sequences based on the presence or absence of ORFs, and a tree is generated using neighbor-net, a distance-based method for constructing phylogenetic networks from binary sequences. The tree built by OSNA was compared to that constructed by a core-genome single-nucleotide polymorphism (SNP)-based analysis. Furthermore, the orthologous average nucleotide identity (OrthoANI) values of the sequences that clustered in a single clade in the OSNA-based tree were calculated.

RESULTS: The phylogenetic tree constructed with OSNA successfully delineated the majority of species of the genus Aeromonas forming conspecific clades for individual species, which was corroborated by OrthoANI values. Moreover, the OSNA-based phylogenetic tree demonstrated high compositional similarity to the core-genome SNP-based phylogenetic tree, supported by the Fowlkes-Mallows index.

CONCLUSIONS: We propose that OSNA is a useful tool in predicting the taxonomic classification of complex bacterial genera.}, } @article {pmid38329369, year = {2024}, author = {Newcomer, EP and Fishbein, SRS and Zhang, K and Hink, T and Reske, KA and Cass, C and Iqbal, ZH and Struttmann, EL and Burnham, C-AD and Dubberke, ER and Dantas, G}, title = {Genomic surveillance of Clostridioides difficile transmission and virulence in a healthcare setting.}, journal = {mBio}, volume = {}, number = {}, pages = {e0330023}, doi = {10.1128/mbio.03300-23}, pmid = {38329369}, issn = {2150-7511}, abstract = {Clostridioides difficile infection (CDI) is a major cause of healthcare-associated diarrhea, despite the widespread implementation of contact precautions for patients with CDI. Here, we investigate strain contamination in a hospital setting and the genomic determinants of disease outcomes. Across two wards over 6 months, we selectively cultured C. difficile from patients (n = 384) and their environments. Whole-genome sequencing (WGS) of 146 isolates revealed that most C. difficile isolates were from clade 1 (131/146, 89.7%), while only one isolate of the hypervirulent ST1 was recovered. Of culture-positive admissions (n = 79), 19 (24%) patients were colonized with toxigenic C. difficile on admission to the hospital. We defined 25 strain networks at ≤2 core gene single nucleotide polymorphisms; two of these networks contain strains from different patients. Strain networks were temporally linked (P < 0.0001). To understand the genomic correlates of the disease, we conducted WGS on an additional cohort of C. difficile (n = 102 isolates) from the same hospital and confirmed that clade 1 isolates are responsible for most CDI cases. We found that while toxigenic C. difficile isolates are associated with the presence of cdtR, nontoxigenic isolates have an increased abundance of prophages. Our pangenomic analysis of clade 1 isolates suggests that while toxin genes (tcdABER and cdtR) were associated with CDI symptoms, they are dispensable for patient colonization. These data indicate that toxigenic and nontoxigenic C. difficile contamination persist in a hospital setting and highlight further investigation into how accessory genomic repertoires contribute to C. difficile colonization and disease.IMPORTANCEClostridioides difficile infection remains a leading cause of hospital-associated diarrhea, despite increased antibiotic stewardship and transmission prevention strategies. This suggests a changing genomic landscape of C. difficile. Our study provides insight into the nature of prevalent C. difficile strains in a hospital setting and transmission patterns among carriers. Longitudinal sampling of surfaces and patient stool revealed that both toxigenic and nontoxigenic strains of C. difficile clade 1 dominate these two wards. Moreover, quantification of transmission in carriers of these clade 1 isolates underscores the need to revisit infection prevention measures in this patient group. We identified unique genetic signatures associated with virulence in this clade. Our data highlight the complexities of preventing transmission of this pathogen in a hospital setting and the need to investigate the mechanisms of in vivo persistence and virulence of prevalent lineages in the host gut microbiome.}, } @article {pmid38322985, year = {2024}, author = {Zhong, C and Hu, G and Hu, C and Xu, C and Zhang, Z and Ning, K}, title = {Comparative genomics analysis reveals genetic characteristics and nitrogen fixation profile of Bradyrhizobium.}, journal = {iScience}, volume = {27}, number = {2}, pages = {108948}, doi = {10.1016/j.isci.2024.108948}, pmid = {38322985}, issn = {2589-0042}, abstract = {Bradyrhizobium is a genus of nitrogen-fixing bacteria, with some species producing nodules in leguminous plants. Investigations into Bradyrhizobium have recently revealed its substantial genetic resources and agricultural benefits, but a comprehensive survey of its genetic diversity and functional properties is lacking. Using a panel of various strains (N = 278), this study performed a comparative genomics analysis to anticipate genes linked with symbiotic nitrogen fixation. Bradyrhizobium's pan-genome consisted of 84,078 gene families, containing 824 core genes and 42,409 accessory genes. Core genes were mainly involved in crucial cell processes, while accessory genes served diverse functions, including nitrogen fixation and nodulation. Three distinct genetic profiles were identified based on the presence/absence of gene clusters related to nodulation, nitrogen fixation, and secretion systems. Most Bradyrhizobium strains from soil and non-leguminous plants lacked major nif/nod genes and were evolutionarily more closely related. These findings shed light on Bradyrhizobium's genetic features for symbiotic nitrogen fixation.}, } @article {pmid38307885, year = {2024}, author = {Zheng, Z and Zhu, M and Zhang, J and Liu, X and Hou, L and Liu, W and Yuan, S and Luo, C and Yao, X and Liu, J and Yang, Y}, title = {A sequence-aware merger of genomic structural variations at population scale.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {960}, pmid = {38307885}, issn = {2041-1723}, abstract = {Merging structural variations (SVs) at the population level presents a significant challenge, yet it is essential for conducting comprehensive genotypic analyses, especially in the era of pangenomics. Here, we introduce PanPop, a tool that utilizes an advanced sequence-aware SV merging algorithm to efficiently merge SVs of various types. We demonstrate that PanPop can merge and optimize the majority of multiallelic SVs into informative biallelic variants. We show its superior precision and lower rates of missing data compared to alternative software solutions. Our approach not only enables the filtering of SVs by leveraging multiple SV callers for enhanced accuracy but also facilitates the accurate merging of large-scale population SVs. These capabilities of PanPop will help to accelerate future SV-related studies.}, } @article {pmid38304712, year = {2024}, author = {Chen, P and Wang, S and Li, H and Qi, X and Hou, Y and Ma, T}, title = {Comparative genomic analyses of Cutibacterium granulosum provide insights into genomic diversity.}, journal = {Frontiers in microbiology}, volume = {15}, number = {}, pages = {1343227}, doi = {10.3389/fmicb.2024.1343227}, pmid = {38304712}, issn = {1664-302X}, abstract = {Cutibacterium granulosum, a commensal bacterium found on human skin, formerly known as Propionibacterium granulosum, rarely causes infections and is generally considered non-pathogenic. Recent research has revealed the transferability of the multidrug-resistant plasmid pTZC1 between C. granulosum and Cutibacterium acnes, the latter being an opportunistic pathogen in surgical site infections. However, there is a noticeable lack of research on the genome of C. granulosum, and the genetic landscape of this species remains largely uncharted. We investigated the genomic features and evolutionary structure of C. granulosum by analyzing a total of 30 Metagenome-Assembled Genomes (MAGs) and isolate genomes retrieved from public databases, as well as those generated in this study. A pan-genome of 6,077 genes was identified for C. granulosum. Remarkably, the 'cloud genes' constituted 62.38% of the pan-genome. Genes associated with mobilome: prophages, transposons [X], defense mechanisms [V] and replication, recombination and repair [L] were enriched in the cloud genome. Phylogenomic analysis revealed two distinct mono-clades, highlighting the genomic diversity of C. granulosum. The genomic diversity was further confirmed by the distribution of Average Nucleotide Identity (ANI) values. The functional profiles analysis of C. granulosum unveiled a wide range of potential Antibiotic Resistance Genes (ARGs) and virulence factors, suggesting its potential tolerance to various environmental challenges. Subtype I-E of the CRISPR-Cas system was the most abundant in these genomes, a feature also detected in C. acnes genomes. Given the widespread distribution of C. granulosum strains within skin microbiome, our findings make a substantial contribution to our broader understanding of the genetic diversity, which may open new avenues for investigating the mechanisms and treatment of conditions such as acne vulgaris.}, } @article {pmid38302106, year = {2024}, author = {Hayeck, TJ and Li, Y and Mosbruger, TL and Bradfield, JP and Gleason, AG and Damianos, G and Shaw, GT and Duke, JL and Conlin, LK and Turner, TN and Fernández-Viña, MA and Sarmady, M and Monos, DS}, title = {The Impact of Patterns in Linkage Disequilibrium and Sequencing Quality on the Imprint of Balancing Selection.}, journal = {Genome biology and evolution}, volume = {}, number = {}, pages = {}, doi = {10.1093/gbe/evae009}, pmid = {38302106}, issn = {1759-6653}, abstract = {Regions under balancing selection are characterized by dense polymorphisms and multiple persistent haplotypes, along with other sequence complexities. Successful identification of these patterns depends on both the statistical approach and the quality of sequencing. To address this challenge, at first, a new statistical method called LD-ABF was developed, employing efficient Bayesian techniques to effectively test for balancing selection. LD-ABF demonstrated the most robust detection of selection in a variety of simulation scenarios, compared against a range of existing tests/tools (Tajima's D, HKA, Dng, BetaScan, and BalLerMix). Furthermore, the impact of the quality of sequencing on detection of balancing selection was explored, as well, using: 1) SNP genotyping and exome data, 2) targeted high-resolution HLA genotyping (IHIW), and 3) whole-genome long-read sequencing data (Pangenome). In the analysis of SNP genotyping and exome data, we identified known targets and 38 new selection signatures in genes not previously linked to balancing selection. To further investigate the impact of sequencing quality on detection of balancing selection, a detailed investigation of the MHC was performed with high-resolution HLA typing data. Higher quality sequencing revealed the HLA-DQ genes consistently demonstrated strong selection signatures otherwise not observed from the sparser SNP array and exome data. The HLA-DQ selection signature was also replicated in the Pangenome samples using considerably less samples but, with high quality long-read sequence data. The improved statistical method, coupled with higher quality sequencing, leads to more consistent identification of selection and enhanced localization of variants under selection, particularly in complex regions.}, } @article {pmid38298071, year = {2024}, author = {Lee, J and Cha, IT and Lee, KE and Son, YK and Cho, S and Seol, D}, title = {Complete genome sequence and potential pathogenic assessment of Flavobacterium plurextorum RSG-18 isolated from the gut of Schlegel's black rockfish, Sebastes schlegelii.}, journal = {Environmental microbiology reports}, volume = {}, number = {}, pages = {}, doi = {10.1111/1758-2229.13226}, pmid = {38298071}, issn = {1758-2229}, support = {NIBR202134204//National Institute of Biological Resources, Ministry of Environment/ ; }, abstract = {Flavobacterium plurextorum is a potential fish pathogen of interest, previously isolated from diseased rainbow trout (Oncorhynchus mykiss) and oomycete-infected chum salmon (Oncorhynchus keta) eggs. We report here the first complete genome sequence of F. plurextorum RSG-18 isolated from the gut of Schlegel's black rockfish (Sebastes schlegelii). The genome of RSG-18 consists of a circular chromosome of 5,610,911 bp with a 33.57% GC content, containing 4858 protein-coding genes, 18 rRNAs, 63 tRNAs and 1 tmRNA. A comparative analysis was conducted on 11 Flavobacterium species previously reported as pathogens or isolated from diseased fish to confirm the potential pathogenicity of RSG-18. In the SEED classification, RSG-18 was found to have 36 genes categorized in 'Virulence, Disease and Defense'. Across all Flavobacterium species, a total of 16 antibiotic resistance genes and 61 putative virulence factors were identified. All species had at least one phage region and type I, III and IX secretion systems. In pan-genomic analysis, core genes consist of genes linked to phages, integrases and matrix-tolerated elements associated with pathology. The complete genome sequence of F. plurextorum RSG-18 will serve as a foundation for future research, enhancing our understanding of Flavobacterium pathogenicity in fish and contributing to the development of effective prevention strategies.}, } @article {pmid38295902, year = {2024}, author = {Chen, Y and Xiang, G and Liu, P and Zhou, X and Guo, P and Wu, Z and Yang, J and Chen, P and Huang, J and Liao, K}, title = {Prevalence and Molecular Characteristics of Ceftazidime-avibactam Resistance among carbapenem-resistant Pseudomonas aeruginosa Clinical Isolates.}, journal = {Journal of global antimicrobial resistance}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgar.2024.01.014}, pmid = {38295902}, issn = {2213-7173}, abstract = {BACKGROUND: Resistance against ceftazidime-avibactam (CZA) in carbapenem-resistant Pseudomonas aeruginosa (CRPA) is emerging. This study was aimed at detecting the prevalence and molecular characteristics of CZA-resistant CRPA clinical isolates in Guangdong Province, China.

METHODS: The antimicrobial susceptibility profile of these strains was determined. A subset of sixteen CZA-resistant CRPA isolates was analyzed by whole genome sequencing (WGS). Genetic surroundings of carbapenem resistance genes and pan-genome-wide association analysis were further studied.

RESULTS: Of the 250 CRPA isolates, CZA resistance rate was 6.4% (16/250). The minimum inhibitory concentration (MIC) of CZA range was from 0.25 to >256 mg/L. MIC50 and MIC90 were 2/4 and 8/4 mg/L, respectively. Among the sixteen CZA-resistant CRPA strains, 31.3% (5/16) of them carried class B carbapenem resistance genes including blaIMP-4, blaIMP-45 and blaVIM-2, located on IncP-2 megaplasmids or chromosome, respectively. Pan-genome-wide association analysis of accessory genes for CZA-susceptible or -resistant CRPA isolates showed that PA1874, a hypothetical protein containing BapA prefix-like domain, was enriched in CZA-resistant group significantly.

CONCLUSIONS: Class B carbapenem resistance genes play important roles in CZA resistance. Meanwhile, PA1874 gene may be a novel mechanism involving in CZA resistance. It is necessary to continually monitor CZA-resistant CRPA isolates.}, } @article {pmid38293557, year = {2023}, author = {Kim, B and Han, SR and Lee, H and Oh, TJ}, title = {Insights into group-specific pattern of secondary metabolite gene cluster in Burkholderia genus.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1302236}, doi = {10.3389/fmicb.2023.1302236}, pmid = {38293557}, issn = {1664-302X}, abstract = {Burkholderia is a versatile strain that has expanded into several genera. It has been steadily reported that the genome features of Burkholderia exhibit activities ranging from plant growth promotion to pathogenicity across various isolation areas. The objective of this study was to investigate the secondary metabolite patterns of 366 Burkholderia species through comparative genomics. Samples were selected based on assembly quality assessment and similarity below 80% in average nucleotide identity. Duplicate samples were excluded. Samples were divided into two groups using FastANI analysis. Group A included B. pseudomallei complex. Group B included B. cepacia complex. The limitations of MLST were proposed. The detection of genes was performed, including environmental and virulence-related genes. In the pan-genome analysis, each complex possessed a similar pattern of cluster for orthologous groups. Group A (n = 185) had 14,066 cloud genes, 2,465 shell genes, 682 soft-core genes, and 2,553 strict-core genes. Group B (n = 181) had 39,867 cloud genes, 4,986 shell genes, 324 soft-core genes, 222 core genes, and 2,949 strict-core genes. AntiSMASH was employed to analyze the biosynthetic gene cluster (BGC). The results were then utilized for network analysis using BiG-SCAPE and CORASON. Principal component analysis was conducted and a table was constructed using the results obtained from antiSMASH. The results were divided into Group A and Group B. We expected the various species to show similar patterns of secondary metabolite gene clusters. For in-depth analysis, a network analysis of secondary metabolite gene clusters was conducted, exemplified by BiG-SCAPE analysis. Depending on the species and complex, Burkholderia possessed several kinds of siderophore. Among them, ornibactin was possessed in most Burkholderia and was clustered into 4,062 clans. There was a similar pattern of gene clusters depending on the species. NRPS_04014 belonged to siderophore BGCs including ornibactin and indigoidine. However, it was observed that each family included a similar species. This suggests that, besides siderophores being species-specific, the ornibactin gene cluster itself might also be species-specific. The results suggest that siderophores are associated with environmental adaptation, possessing a similar pattern of siderophore gene clusters among species, which could provide another perspective on species-specific environmental adaptation mechanisms.}, } @article {pmid38290434, year = {2024}, author = {Joubert, PM and Krasileva, KV}, title = {Distinct genomic contexts predict gene presence-absence variation in different pathotypes of Magnaporthe oryzae.}, journal = {Genetics}, volume = {}, number = {}, pages = {}, doi = {10.1093/genetics/iyae012}, pmid = {38290434}, issn = {1943-2631}, abstract = {Fungi use the accessory gene content of their pangenomes to adapt to their environments. While gene presence-absence variation (PAV) contributes to shaping accessory gene reservoirs, the genomic contexts that shape these events remain unclear. Since pangenome studies are typically species-wide and do not analyze different populations separately, it is yet to be uncovered whether PAV patterns and mechanisms are consistent across populations. Fungal plant pathogens are useful models for studying PAV because they rely on it to adapt to their hosts, and members of a species often infect distinct hosts. We analyzed gene PAV in the blast fungus, Magnaporthe oryzae (syn. Pyricularia oryzae), and found that PAV genes involved in host-pathogen and microbe-microbe interactions may drive the adaptation of the fungus to its environment. We then analyzed genomic and epigenomic features of PAV and observed that proximity to transposable elements, gene GC content, gene length, expression level in the host, and histone H3K27me3 marks were different between PAV genes and conserved genes. We used these features to construct a model that was able to predict whether a gene is likely to experience PAV with high precision (86.06%) and recall (92.88%) in M. oryzae. Finally, we found that PAV genes in the rice and wheat pathotypes of M. oryzae differed in their number and their genomic context. Our results suggest that genomic and epigenomic features of gene PAV can be used to better understand and predict fungal pangenome evolution. We also show that substantial intra-species variation can exist in these features.}, } @article {pmid38281938, year = {2024}, author = {Zaccaron, AZ and Stergiopoulos, I}, title = {Analysis of five near-complete genome assemblies of the tomato pathogen Cladosporium fulvum uncovers additional accessory chromosomes and structural variations induced by transposable elements effecting the loss of avirulence genes.}, journal = {BMC biology}, volume = {22}, number = {1}, pages = {25}, pmid = {38281938}, issn = {1741-7007}, support = {1557995//Directorate for Biological Sciences/ ; CA-D-PPA-2185-H//National Institute of Food and Agriculture/ ; }, abstract = {BACKGROUND: Fungal plant pathogens have dynamic genomes that allow them to rapidly adapt to adverse conditions and overcome host resistance. One way by which this dynamic genome plasticity is expressed is through effector gene loss, which enables plant pathogens to overcome recognition by cognate resistance genes in the host. However, the exact nature of these loses remains elusive in many fungi. This includes the tomato pathogen Cladosporium fulvum, which is the first fungal plant pathogen from which avirulence (Avr) genes were ever cloned and in which loss of Avr genes is often reported as a means of overcoming recognition by cognate tomato Cf resistance genes. A recent near-complete reference genome assembly of C. fulvum isolate Race 5 revealed a compartmentalized genome architecture and the presence of an accessory chromosome, thereby creating a basis for studying genome plasticity in fungal plant pathogens and its impact on avirulence genes.

RESULTS: Here, we obtained near-complete genome assemblies of four additional C. fulvum isolates. The genome assemblies had similar sizes (66.96 to 67.78 Mb), number of predicted genes (14,895 to 14,981), and estimated completeness (98.8 to 98.9%). Comparative analysis that included the genome of isolate Race 5 revealed high levels of synteny and colinearity, which extended to the density and distribution of repetitive elements and of repeat-induced point (RIP) mutations across homologous chromosomes. Nonetheless, structural variations, likely mediated by transposable elements and effecting the deletion of the avirulence genes Avr4E, Avr5, and Avr9, were also identified. The isolates further shared a core set of 13 chromosomes, but two accessory chromosomes were identified as well. Accessory chromosomes were significantly smaller in size, and one carried pseudogenized copies of two effector genes. Whole-genome alignments further revealed genomic islands of near-zero nucleotide diversity interspersed with islands of high nucleotide diversity that co-localized with repeat-rich regions. These regions were likely generated by RIP, which generally asymmetrically affected the genome of C. fulvum.

CONCLUSIONS: Our results reveal new evolutionary aspects of the C. fulvum genome and provide new insights on the importance of genomic structural variations in overcoming host resistance in fungal plant pathogens.}, } @article {pmid38279113, year = {2024}, author = {Rajput, J and Chandra, G and Jain, C}, title = {Co-linear chaining on pangenome graphs.}, journal = {Algorithms for molecular biology : AMB}, volume = {19}, number = {1}, pages = {4}, pmid = {38279113}, issn = {1748-7188}, abstract = {Pangenome reference graphs are useful in genomics because they compactly represent the genetic diversity within a species, a capability that linear references lack. However, efficiently aligning sequences to these graphs with complex topology and cycles can be challenging. The seed-chain-extend based alignment algorithms use co-linear chaining as a standard technique to identify a good cluster of exact seed matches that can be combined to form an alignment. Recent works show how the co-linear chaining problem can be efficiently solved for acyclic pangenome graphs by exploiting their small width and how incorporating gap cost in the scoring function improves alignment accuracy. However, it remains open on how to effectively generalize these techniques for general pangenome graphs which contain cycles. Here we present the first practical formulation and an exact algorithm for co-linear chaining on cyclic pangenome graphs. We rigorously prove the correctness and computational complexity of the proposed algorithm. We evaluate the empirical performance of our algorithm by aligning simulated long reads from the human genome to a cyclic pangenome graph constructed from 95 publicly available haplotype-resolved human genome assemblies. While the existing heuristic-based algorithms are faster, the proposed algorithm provides a significant advantage in terms of accuracy. Implementation (https://github.com/at-cg/PanAligner).}, } @article {pmid38278862, year = {2024}, author = {Mondol, SM and Islam, I and Islam, MR and Shakil, SK and Rakhi, NN and Mustary, JF and Amiruzzaman, and Gomes, DJ and Shahjalal, HM and Rahaman, MM}, title = {Genomic landscape of NDM-1 producing multidrug-resistant Providencia stuartii causing burn wound infections in Bangladesh.}, journal = {Scientific reports}, volume = {14}, number = {1}, pages = {2246}, pmid = {38278862}, issn = {2045-2322}, support = {LS2019935//Ministry of Education, Government of the People's Republic of Bangladesh/ ; }, abstract = {The increasing antimicrobial resistance in Providencia stuartii (P. stuartii) worldwide, particularly concerning for immunocompromised and burn patients, has raised concern in Bangladesh, where the significance of this infectious opportunistic pathogen had been previously overlooked, prompting a need for investigation. The two strains of P. stuartii (P. stuartii SHNIBPS63 and P. stuartii SHNIBPS71) isolated from wound swab of two critically injured burn patients were found to be multidrug-resistant and P. stuartii SHNIBPS63 showed resistance to all the 22 antibiotics tested as well as revealed the co-existence of blaVEB-6 (Class A), blaNDM-1 (Class B), blaOXA-10 (Class D) beta lactamase genes. Complete resistance to carbapenems through the production of NDM-1, is indicative of an alarming situation as carbapenems are considered to be the last line antibiotic to combat this pathogen. Both isolates displayed strong biofilm-forming abilities and exhibited resistance to copper, zinc, and iron, in addition to carrying multiple genes associated with metal resistance and the formation of biofilms. The study also encompassed a pangenome analysis utilizing a dataset of eighty-six publicly available P. stuartii genomes (n = 86), revealing evidence of an open or expanding pangenome for P. stuartii. Also, an extensive genome-wide analysis of all the P. stuartii genomes revealed a concerning global prevalence of diverse antimicrobial resistance genes, with a particular alarm raised over the abundance of carbapenem resistance gene blaNDM-1. Additionally, this study highlighted the notable genetic diversity within P. stuartii, significant informations about phylogenomic relationships and ancestry, as well as potential for cross-species transmission, raising important implications for public health and microbial adaptation across different environments.}, } @article {pmid38271481, year = {2024}, author = {Barbitoff, YA and Ushakov, MO and Lazareva, TE and Nasykhova, YA and Glotov, AS and Predeus, AV}, title = {Bioinformatics of germline variant discovery for rare disease diagnostics: current approaches and remaining challenges.}, journal = {Briefings in bioinformatics}, volume = {25}, number = {2}, pages = {}, doi = {10.1093/bib/bbad508}, pmid = {38271481}, issn = {1477-4054}, support = {075-15-2021-1058//Ministry of Science and Higher Education of Russian Federation/ ; }, abstract = {Next-generation sequencing (NGS) has revolutionized the field of rare disease diagnostics. Whole exome and whole genome sequencing are now routinely used for diagnostic purposes; however, the overall diagnosis rate remains lower than expected. In this work, we review current approaches used for calling and interpretation of germline genetic variants in the human genome, and discuss the most important challenges that persist in the bioinformatic analysis of NGS data in medical genetics. We describe and attempt to quantitatively assess the remaining problems, such as the quality of the reference genome sequence, reproducible coverage biases, or variant calling accuracy in complex regions of the genome. We also discuss the prospects of switching to the complete human genome assembly or the human pan-genome and important caveats associated with such a switch. We touch on arguably the hardest problem of NGS data analysis for medical genomics, namely, the annotation of genetic variants and their subsequent interpretation. We highlight the most challenging aspects of annotation and prioritization of both coding and non-coding variants. Finally, we demonstrate the persistent prevalence of pathogenic variants in the coding genome, and outline research directions that may enhance the efficiency of NGS-based disease diagnostics.}, } @article {pmid38270699, year = {2024}, author = {Singh, S and Singh, R and Priyadarsini, S and Ola, AL}, title = {Genomics empowering conservation action and improvement of celery in the face of climate change.}, journal = {Planta}, volume = {259}, number = {2}, pages = {42}, pmid = {38270699}, issn = {1432-2048}, abstract = {Integration of genomic approaches like whole genome sequencing, functional genomics, evolutionary genomics, and CRISPR/Cas9-based genome editing has accelerated the improvement of crop plants including leafy vegetables like celery in the face of climate change. The anthropogenic climate change is a real peril to the existence of life forms on our planet, including human and plant life. Climate change is predicted to be a significant threat to biodiversity and food security in the coming decades and is rapidly transforming global farming systems. To avoid the ghastly future in the face of climate change, the elucidation of shifts in the geographical range of plant species, species adaptation, and evolution is necessary for plant scientists to develop climate-resilient strategies. In the post-genomics era, the increasing availability of genomic resources and integration of multifaceted genomics elements is empowering biodiversity conservation action, restoration efforts, and identification of genomic regions adaptive to climate change. Genomics has accelerated the true characterization of crop wild relatives, genomic variations, and the development of climate-resilient varieties to ensure food security for 10 billion people by 2050. In this review, we have summarized the applications of multifaceted genomic tools, like conservation genomics, whole genome sequencing, functional genomics, genome editing, pangenomics, in the conservation and adaptation of plant species with a focus on celery, an aromatic and medicinal Apiaceae vegetable. We focus on how conservation scientists can utilize genomics and genomic data in conservation and improvement.}, } @article {pmid38268053, year = {2024}, author = {Uruén, C and Fernandez, A and Arnal, JL and Del Pozo, M and Amoribieta, MC and de Blas, I and Jurado, P and Calvo, JH and Gottschalk, M and González-Vázquez, LD and Arenas, M and Marín, CM and Arenas, J}, title = {Genomic and phenotypic analysis of invasive Streptococcus suis isolated in Spain reveals genetic diversification and associated virulence traits.}, journal = {Veterinary research}, volume = {55}, number = {1}, pages = {11}, pmid = {38268053}, issn = {1297-9716}, support = {PID2020-114617RB-100//Ministerio de Ciencia e Innovación/Agencia Española de investigación/ ; LMP58_21//Departamento de Educación, Cultura y Deporte, Gobierno de Aragón/ ; }, abstract = {Streptococcus suis is a zoonotic pathogen that causes a major health problem in the pig production industry worldwide. Spain is one of the largest pig producers in the world. This work aimed to investigate the genetic and phenotypic features of invasive S. suis isolates recovered in Spain. A panel of 156 clinical isolates recovered from 13 Autonomous Communities, representing the major pig producers, were analysed. MLST and serotyping analysis revealed that most isolates (61.6%) were assigned to ST1 (26.3%), ST123 (18.6%), ST29 (9.6%), and ST3 (7.1%). Interestingly, 34 new STs were identified, indicating the emergence of novel genetic lineages. Serotypes 9 (27.6%) and 1 (21.8%) prevailed, followed by serotypes 7 (12.8%) and 2 (12.2%). Analysis of 13 virulence-associated genes showed significant associations between ST, serotype, virulence patterns, and clinical features, evidencing particular virulence traits associated with genetic clusters. The pangenome was generated, and the core genome was distributed in 7 Bayesian groups where each group included a variable set of over- and under-represented genes of different categories. The study provides comprehensive data and knowledge to improve the design of new vaccines, antimicrobial treatments, and bacterial typing approaches.}, } @article {pmid38265421, year = {2024}, author = {Kothe, CI and Monnet, C and Irlinger, F and Virsolvy, M and Frühling, A and Neumann-Schaal, M and Wolf, J and Renault, P}, title = {Halomonas citrativorans sp. nov., Halomonas casei sp. nov. and Halomonas colorata sp. nov., isolated from French cheese rinds.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {74}, number = {1}, pages = {}, doi = {10.1099/ijsem.0.006234}, pmid = {38265421}, issn = {1466-5034}, abstract = {Eight Gram-stain-negative bacterial strains were isolated from cheese rinds sampled in France. On the basis of 16S rRNA gene sequence analysis, all isolates were assigned to the genus Halomonas. Phylogenetic investigations, including 16S rRNA gene studies, multilocus sequence analysis, reconstruction of a pan-genome phylogenetic tree with the concatenated core-genome content and average nucleotide identity (ANI) calculations, revealed that they constituted three novel and well-supported clusters. The closest relative species, determined using the whole-genome sequences of the strains, were Halomonas zhanjiangensis for two groups of cheese strains, sharing 82.4 and 93.1 % ANI, and another cluster sharing 92.2 % ANI with the Halomonas profundi type strain. The strains isolated herein differed from the previously described species by ANI values <95 % and several biochemical, enzymatic and colony characteristics. The results of phenotypic, phylogenetic and chemotaxonomic analyses indicated that the isolates belonged to three novel Halomonas species, for which the names Halomonas citrativorans sp. nov., Halomonas casei sp. nov. and Halomonas colorata sp. nov. are proposed, with isolates FME63[T] (=DSM 113315[T]=CIRM-BIA2430[T]=CIP 111880[T]=LMG 32013[T]), FME64[T] (=DSM 113316[T]=CIRM-BIA2431[T]=CIP 111877[T]=LMG 32015[T]) and FME66[T] (=DSM 113318[T]=CIRM-BIA2433[T]=CIP 111876[T]=LMG 32014[T]) as type strains, respectively.}, } @article {pmid38261993, year = {2024}, author = {Teyssonniere, EM and Shichino, Y and Mito, M and Friedrich, A and Iwasaki, S and Schacherer, J}, title = {Translation variation across genetic backgrounds reveals a post-transcriptional buffering signature in yeast.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkae030}, pmid = {38261993}, issn = {1362-4962}, support = {772505/ERC_/European Research Council/International ; S10 OD018174/CD/ODCDC CDC HHS/United States ; }, abstract = {Gene expression is known to vary among individuals, and this variability can impact the phenotypic diversity observed in natural populations. While the transcriptome and proteome have been extensively studied, little is known about the translation process itself. Here, we therefore performed ribosome and transcriptomic profiling on a genetically and ecologically diverse set of natural isolates of the Saccharomyces cerevisiae yeast. Interestingly, we found that the Euclidean distances between each profile and the expression fold changes in each pairwise isolate comparison were higher at the transcriptomic level. This observation clearly indicates that the transcriptional variation observed in the different isolates is buffered through a phenomenon known as post-transcriptional buffering at the translation level. Furthermore, this phenomenon seemed to have a specific signature by preferentially affecting essential genes as well as genes involved in complex-forming proteins, and low transcribed genes. We also explored the translation of the S. cerevisiae pangenome and found that the accessory genes related to introgression events displayed similar transcription and translation levels as the core genome. By contrast, genes acquired through horizontal gene transfer events tended to be less efficiently translated. Together, our results highlight both the extent and signature of the post-transcriptional buffering.}, } @article {pmid38260597, year = {2024}, author = {Villani, F and Guarracino, A and Ward, RR and Green, T and Emms, M and Pravenec, M and Prins, P and Garrison, E and Williams, RW and Chen, H and Colonna, V}, title = {Pangenome reconstruction in rats enhances genotype-phenotype mapping and novel variant discovery.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2024.01.10.575041}, pmid = {38260597}, abstract = {The HXB/BXH family of recombinant inbred rat strains is a unique genetic resource that has been extensively phenotyped over 25 years, resulting in a vast dataset of quantitative molecular and physiological phenotypes. We built a pangenome graph from 10x Genomics linked-read data for 31 recombinant inbred rats to study genetic variation and association mapping. The pangenome length was on average 2.4 times greater than the corresponding length of the reference mRatBN7.2, confirming the capture of substantial additional variation. We validated variants in challenging regions, including complex structural variants resolving into multiple haplotypes. Phenome-wide association analysis of validated SNPs uncovered variants associated with glucose/insulin levels and hippocampal gene expression. We propose an interaction between Pirl1l1, Cromogranine expression, TNF-α levels, and insulin regulation. This study demonstrates the utility of linked-read pangenomes for comprehensive variant detection and mapping phenotypic diversity in a widely used rat genetic reference panel.}, } @article {pmid38259089, year = {2024}, author = {Chen, F and Yin, Y and Chen, H and Wang, R and Wang, S and Wang, H}, title = {Global genetic diversity and Asian clades evolution: a phylogeographic study of Staphylococcus aureus sequence type 5.}, journal = {Antimicrobial agents and chemotherapy}, volume = {}, number = {}, pages = {e0117523}, doi = {10.1128/aac.01175-23}, pmid = {38259089}, issn = {1098-6596}, abstract = {Staphylococcus aureus sequence type (ST) 5 has spread worldwide; however, phylogeographic studies on the evolution of global phylogenetic and Asian clades of ST5 are lacking. This study included 368 ST5 genome sequences, including 111 newly generated sequences. Primary phylogenetic analysis suggested that there are five clades, and geographical clustering of ST5 methicillin-resistant S. aureus (MRSA) was linked to the acquisition of S. aureus pathogenicity islands (SaPIs; enterotoxin gene island) and integration of the prophage φSa3. The most recent common ancestor of global S. aureus ST5 dates back to the mid-1940s, coinciding with the clinical introduction of penicillin. Bayesian phylogeographic inference allowed to ancestrally trace the Asian ST5 MRSA clade to Japan, which may have spread to major cities in China and Korea in the 1990s. Based on a pan-genome-wide association study, the emergence of Asian ST5 clades was attributed to the gain of prophages, SaPIs, and plasmids, as well as the coevolution of resistance genes. Clade IV displayed greater genomic diversity than the Asian MRSA clades. Collectively, our study provides in-depth insights into the global evolution of S. aureus ST5 mainly in China and the United States and reveals that different S. aureus ST5 clades have arisen independently in different parts of the world, with limited geographic dispersal across continents.}, } @article {pmid38257915, year = {2023}, author = {Afordoanyi, DM and Akosah, YA and Shnakhova, L and Saparmyradov, K and Diabankana, RGC and Validov, S}, title = {Biotechnological Key Genes of the Rhodococcus erythropolis MGMM8 Genome: Genes for Bioremediation, Antibiotics, Plant Protection, and Growth Stimulation.}, journal = {Microorganisms}, volume = {12}, number = {1}, pages = {}, doi = {10.3390/microorganisms12010088}, pmid = {38257915}, issn = {2076-2607}, support = {RF-1930.61321X0001/15.IP.21.0020//Ministry of Education 362 and Science of the Russian Federation/ ; }, abstract = {Anthropogenic pollution, including residues from the green revolution initially aimed at addressing food security and healthcare, has paradoxically exacerbated environmental challenges. The transition towards comprehensive green biotechnology and bioremediation, achieved with lower financial investment, hinges on microbial biotechnology, with the Rhodococcus genus emerging as a promising contender. The significance of fully annotating genome sequences lies in comprehending strain constituents, devising experimental protocols, and strategically deploying these strains to address pertinent issues using pivotal genes. This study revolves around Rhodococcus erythropolis MGMM8, an associate of winter wheat plants in the rhizosphere. Through the annotation of its chromosomal genome and subsequent comparison with other strains, its potential applications were explored. Using the antiSMASH server, 19 gene clusters were predicted, encompassing genes responsible for antibiotics and siderophores. Antibiotic resistance evaluation via the Comprehensive Antibiotic Resistance Database (CARD) identified five genes (vanW, vanY, RbpA, iri, and folC) that were parallel to strain CCM2595. Leveraging the NCBI Prokaryotic Genome Annotation Pipeline (PGAP) for biodegradation, heavy metal resistance, and remediation genes, the presence of chlorimuron-ethyl, formaldehyde, benzene-desulfurization degradation genes, and heavy metal-related genes (ACR3, arsC, corA, DsbA, modA, and recG) in MGMM8 was confirmed. Furthermore, quorum-quenching signal genes, critical for curbing biofilm formation and virulence elicited by quorum-sensing in pathogens, were also discerned within MGMM8's genome. In light of these predictions, the novel isolate MGMM8 warrants phenotypic assessment to gauge its potential in biocontrol and bioremediation. This evaluation extends to isolating active compounds for potential antimicrobial activities against pathogenic microorganisms. The comprehensive genome annotation process has facilitated the genetic characterization of MGMM8 and has solidified its potential as a biotechnological strain to address global anthropogenic predicaments.}, } @article {pmid38257891, year = {2023}, author = {Godoy, M and Montes de Oca, M and Suarez, R and Martinez, A and Pontigo, JP and Caro, D and Kusch, K and Coca, Y and Bohle, H and Bayliss, S and Kibenge, M and Kibenge, F}, title = {Genomics of Re-Emergent Aeromonas salmonicida in Atlantic Salmon Outbreaks.}, journal = {Microorganisms}, volume = {12}, number = {1}, pages = {}, doi = {10.3390/microorganisms12010064}, pmid = {38257891}, issn = {2076-2607}, abstract = {Furunculosis, caused by Aeromonas salmonicida, poses a significant threat to both salmonid and non-salmonid fish in diverse aquatic environments. This study explores the genomic intricacies of re-emergent A. salmonicida outbreaks in Atlantic salmon (Salmo salar). Previous clinical cases have exhibited pathological characteristics, such as periorbital hemorrhages and gastrointestinal abnormalities. Genomic sequencing of three Chilean isolates (ASA04, ASA05, and CIBA_5017) and 25 previously described genomes determined the pan-genome, phylogenomics, insertion sequences, and restriction-modification systems. Unique gene families have contributed to an improved understanding of the psychrophilic and mesophilic clades, while phylogenomic analysis has been used to identify mesophilic and psychrophilic strains, thereby further differentiating between typical and atypical psychrophilic isolates. Diverse insertion sequences and restriction-modification patterns have highlighted genomic structural differences, and virulence factor predictions can emphasize exotoxin disparities, especially between psychrophilic and mesophilic strains. Thus, a novel plasmid was characterized which emphasized the role of plasmids in virulence and antibiotic resistance. The analysis of antibiotic resistance factors revealed resistance against various drug classes in Chilean strains. Overall, this study elucidates the genomic dynamics of re-emergent A. salmonicida and provides novel insights into their virulence, antibiotic resistance, and population structure.}, } @article {pmid38254124, year = {2024}, author = {Fan, J and Khan, J and Singh, NP and Pibiri, GE and Patro, R}, title = {Fulgor: a fast and compact k-mer index for large-scale matching and color queries.}, journal = {Algorithms for molecular biology : AMB}, volume = {19}, number = {1}, pages = {3}, pmid = {38254124}, issn = {1748-7188}, support = {R01HG009937/NH/NIH HHS/United States ; }, abstract = {The problem of sequence identification or matching-determining the subset of reference sequences from a given collection that are likely to contain a short, queried nucleotide sequence-is relevant for many important tasks in Computational Biology, such as metagenomics and pangenome analysis. Due to the complex nature of such analyses and the large scale of the reference collections a resource-efficient solution to this problem is of utmost importance. This poses the threefold challenge of representing the reference collection with a data structure that is efficient to query, has light memory usage, and scales well to large collections. To solve this problem, we describe an efficient colored de Bruijn graph index, arising as the combination of a k-mer dictionary with a compressed inverted index. The proposed index takes full advantage of the fact that unitigs in the colored compacted de Bruijn graph are monochromatic (i.e., all k-mers in a unitig have the same set of references of origin, or color). Specifically, the unitigs are kept in the dictionary in color order, thereby allowing for the encoding of the map from k-mers to their colors in as little as 1 + o(1) bits per unitig. Hence, one color per unitig is stored in the index with almost no space/time overhead. By combining this property with simple but effective compression methods for integer lists, the index achieves very small space. We implement these methods in a tool called Fulgor, and conduct an extensive experimental analysis to demonstrate the improvement of our tool over previous solutions. For example, compared to Themisto-the strongest competitor in terms of index space vs. query time trade-off-Fulgor requires significantly less space (up to 43% less space for a collection of 150,000 Salmonella enterica genomes), is at least twice as fast for color queries, and is 2-6[Formula: see text] faster to construct.}, } @article {pmid38253726, year = {2024}, author = {Jeong, J and Ahn, S and Truong, TC and Kim, JH and Weerawongwiwat, V and Lee, JS and Yoon, JH and Sukhoom, A and Kim, W}, title = {Description of Mycolicibacterium arenosum sp. nov. Isolated from Coastal Sand on the Yellow Sea Coast.}, journal = {Current microbiology}, volume = {81}, number = {3}, pages = {73}, pmid = {38253726}, issn = {1432-0991}, support = {NIBR202102205//National Institute of Biological Resources/ ; NRF-2021R1C1C2003223//National Research Foundation of Korea/ ; 2017//Chung-Ang University/ ; }, abstract = {A Gram-staining-positive, aerobic, non-spore-forming bacterium was isolated from coastal sand samples from Incheon in the Republic of Korea and designated as strain CAU 1645[T]. The optimum conditions for growth were observed at 30 °C in growth media containing 1% (w/v) NaCl at pH 9.0. The predominant respiratory quinone was MK-9 and the major fatty acids were C16:0, C17:1 w7c, and summed feature 7. Similarly, the 16S rRNA gene sequence exhibited the highest similarity with Mycolicibacterium bacteremicum DSM 45578[T] and Mycolicibacterium neoaurum JCM 6365[T], both of which exhibited similarity rates of 97.2%. The genomic DNA G+C content was 68.2%. The whole genome of strain CAU 1645[T] was obtained and annotated with annotation using RAST server. The pan-genome analysis was determined using Prokka, Roary, and Phandango. In the pan-genome analysis, the strain CAU 1645[T] shared 40 core genes with closely related Mycolicibacterium species, including the AcpM gene, the meromycolate extension acyl carrier protein involved in forming impermeable cell walls in mycobacteria. Therefore, our findings demonstrated that the isolate represents a novel species of the genus Mycolicibacterium, for which we propose the name Mycolicibacterium arenosum sp. nov. The type strain is CAU 1645[T] (= KCTC 49724[T] = MCCC 1K07087[T]).}, } @article {pmid38253606, year = {2024}, author = {Groza, C and Schwendinger-Schreck, C and Cheung, WA and Farrow, EG and Thiffault, I and Lake, J and Rizzo, WB and Evrony, G and Curran, T and Bourque, G and Pastinen, T}, title = {Pangenome graphs improve the analysis of structural variants in rare genetic diseases.}, journal = {Nature communications}, volume = {15}, number = {1}, pages = {657}, pmid = {38253606}, issn = {2041-1723}, abstract = {Rare DNA alterations that cause heritable diseases are only partially resolvable by clinical next-generation sequencing due to the difficulty of detecting structural variation (SV) in all genomic contexts. Long-read, high fidelity genome sequencing (HiFi-GS) detects SVs with increased sensitivity and enables assembling personal and graph genomes. We leverage standard reference genomes, public assemblies (n = 94) and a large collection of HiFi-GS data from a rare disease program (Genomic Answers for Kids, GA4K, n = 574 assemblies) to build a graph genome representing a unified SV callset in GA4K, identify common variation and prioritize SVs that are more likely to cause genetic disease (MAF < 0.01). Using graphs, we obtain a higher level of reproducibility than the standard reference approach. We observe over 200,000 SV alleles unique to GA4K, including nearly 1000 rare variants that impact coding sequence. With improved specificity for rare SVs, we isolate 30 candidate SVs in phenotypically prioritized genes, including known disease SVs. We isolate a novel diagnostic SV in KMT2E, demonstrating use of personal assemblies coupled with pangenome graphs for rare disease genomics. The community may interrogate our pangenome with additional assemblies to discover new SVs within the allele frequency spectrum relevant to genetic diseases.}, } @article {pmid38249481, year = {2023}, author = {Deng, Y and Jiang, ZM and Han, XF and Su, J and Yu, LY and Liu, WH and Zhang, YQ}, title = {Corrigendum: Pangenome analysis of the genus Herbiconiux and proposal of four new species associated with Chinese medicinal plants.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1295710}, doi = {10.3389/fmicb.2023.1295710}, pmid = {38249481}, issn = {1664-302X}, abstract = {[This corrects the article DOI: 10.3389/fmicb.2023.1119226.].}, } @article {pmid38246550, year = {2024}, author = {Song, Z and Ge, Y and Yu, X and Liu, R and Liu, C and Cheng, K and Guo, L and Yao, S}, title = {Development of a SNP-based strain-identified method for Streptococcus thermophilus CICC 6038 and Lactobacillus delbrueckii ssp. bulgaricus CICC 6047 using pan-genomics analysis.}, journal = {Journal of dairy science}, volume = {}, number = {}, pages = {}, doi = {10.3168/jds.2023-23655}, pmid = {38246550}, issn = {1525-3198}, abstract = {The health benefits conferred by probiotics is specific to individual probiotic strains, highlighting the importance of identifying specific strains for research and production purposes. Streptococcus thermophilus CICC 6038 and Lactobacillus delbrueckii ssp. bulgaricus CICC 6047 are exceedingly valuable for commercial use with an excellent mixed-culture fermentation. To differentiate these 2 strains from other S. thermophilus and L. delbrueckii ssp. bulgaricus, a specific, sensitive, accurate, rapid, convenient, and cost-effective method is required. In this study, we conducted a pan-genome analysis of S. thermophilus and L. delbrueckii ssp. bulgaricus to identify species-specific core genes, along with strain-specific single-nucleotide polymorphisms (SNPs). These genes were used to develop suitable PCR primers, and the conformity of sequence length and unique SNPs was confirmed by sequencing for qualitative identification at the strain level. The results demonstrated that SNPs analysis of PCR products derived from these primers could distinguish CICC 6038 and CICC 6047 accurately and reproducibly from the other strains of S. thermophilus and L. delbrueckii ssp. bulgaricus, respectively. The strain-specific PCR method based on SNPs herein is universally applicable for probiotics identification. It offers valuable insights into identifying probiotics at the strain level that is fit-for-purpose in quality control and compliance assessment of commercial dairy products.}, } @article {pmid38238664, year = {2024}, author = {Peng, M and Lin, W and Zhou, A and Jiang, Z and Zhou, F and Wang, Z}, title = {High genetic diversity and different type VI secretion systems in Enterobacter species revealed by comparative genomics analysis.}, journal = {BMC microbiology}, volume = {24}, number = {1}, pages = {26}, pmid = {38238664}, issn = {1471-2180}, support = {32200094//National Natural Science Foundation of China/ ; PT012201//Hubei Key Laboratory of Biological Resources Protection and Utilization (Hubei Minzu University)/ ; 2022CFB674//Natural Science Foundation of Hubei Province/ ; }, abstract = {The human-pathogenic Enterobacter species are widely distributed in diverse environmental conditions, however, the understanding of the virulence factors and genetic variations within the genus is very limited. In this study, we performed comparative genomics analysis of 49 strains originated from diverse niches and belonged to eight Enterobacter species, in order to further understand the mechanism of adaption to the environment in Enterobacter. The results showed that they had an open pan-genome and high genomic diversity which allowed adaptation to distinctive ecological niches. We found the number of secretion systems was the highest among various virulence factors in these Enterobacter strains. Three types of T6SS gene clusters including T6SS-A, T6SS-B and T6SS-C were detected in most Enterobacter strains. T6SS-A and T6SS-B shared 13 specific core genes, but they had different gene structures, suggesting they probably have different biological functions. Notably, T6SS-C was restricted to E. cancerogenus. We detected a T6SS gene cluster, highly similar to T6SS-C (91.2%), in the remote related Citrobacter rodenitum, suggesting that this unique gene cluster was probably acquired by horizontal gene transfer. The genomes of Enterobacter strains possess high genetic diversity, limited number of conserved core genes, and multiple copies of T6SS gene clusters with differentiated structures, suggesting that the origins of T6SS were not by duplication instead by independent acquisition. These findings provide valuable information for better understanding of the functional features of Enterobacter species and their evolutionary relationships.}, } @article {pmid38230932, year = {2024}, author = {Silva-Pereira, TT and Soler-Camargo, NC and Guimarães, AMS}, title = {Diversification of gene content in the Mycobacterium tuberculosis complex is determined by phylogenetic and ecological signatures.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0228923}, doi = {10.1128/spectrum.02289-23}, pmid = {38230932}, issn = {2165-0497}, abstract = {In this study, we analyzed the gene content of different ecotypes of the Mycobacterium tuberculosis complex (MTBC), the pathogens of tuberculosis. We found that changes in their gene content are associated with their ecological features, such as host preference. Gene loss was identified as the primary driver of these changes, which can vary even among different strains of the same ecotype. Our study also revealed that the gene content relatedness of these bacteria does not always mirror their evolutionary relationships. In addition, some genes of virulence can be variably lost among strains of the same MTBC ecotype, likely helping them to evade the immune system. Overall, our study highlights the importance of understanding how gene loss can lead to new adaptations in these bacteria and how different selective pressures may influence their genetic makeup.}, } @article {pmid38229335, year = {2024}, author = {Venkatachalam, S and Jabir, T and Vipindas, PV and Krishnan, KP}, title = {Ecological significance of Candidatus ARS69 and Gemmatimonadota in the Arctic glacier foreland ecosystems.}, journal = {Applied microbiology and biotechnology}, volume = {108}, number = {1}, pages = {128}, pmid = {38229335}, issn = {1432-0614}, abstract = {The Gemmatimonadota phylum has been widely detected in diverse natural environments, yet their specific ecological roles in many habitats remain poorly investigated. Similarly, the Candidatus ARS69 phylum has been identified only in a few habitats, and literature on their metabolic functions is relatively scarce. In the present study, we investigated the ecological significance of phyla Ca. ARS69 and Gemmatimonadota in the Arctic glacier foreland (GF) ecosystems through genome-resolved metagenomics. We have reconstructed the first high-quality metagenome-assembled genome (MAG) belonging to Ca. ARS69 and 12 other MAGs belonging to phylum Gemmatimonadota from the three different Arctic GF samples. We further elucidated these two groups phylogenetic lineage and their metabolic function through phylogenomic and pangenomic analysis. The analysis showed that all the reconstructed MAGs potentially belonged to novel species. The MAGs belonged to Ca. ARS69 consist about 8296 gene clusters, of which only about 8% of single-copy core genes (n = 980) were shared among them. The study also revealed the potential ecological role of Ca. ARS69 is associated with carbon fixation, denitrification, sulfite oxidation, and reduction biochemical processes in the GF ecosystems. Similarly, the study demonstrates the widespread distribution of different classes of Gemmatimonadota across wide ranges of ecosystems and their metabolic functions, including in the polar region. KEY POINTS: • Glacier foreland ecosystems act as a natural laboratory to study microbial community structure. • We have reconstructed 13 metagenome-assembled genomes from the soil samples. • All the reconstructed MAGs belonged to novel species with different metabolic processes. • Ca. ARS69 and Gemmatimonadota MAGs were found to participate in carbon fixation and denitrification processes.}, } @article {pmid38225047, year = {2024}, author = {Han, DM and Baek, JH and Choi, DG and Jeon, MS and Eyun, SI and Jeon, CO}, title = {Comparative pangenome analysis of Aspergillus flavus and Aspergillus oryzae reveals their phylogenetic, genomic, and metabolic homogeneity.}, journal = {Food microbiology}, volume = {119}, number = {}, pages = {104435}, doi = {10.1016/j.fm.2023.104435}, pmid = {38225047}, issn = {1095-9998}, abstract = {Aspergillus flavus and Aspergillus oryzae are closely related fungal species with contrasting roles in food safety and fermentation. To comprehensively investigate their phylogenetic, genomic, and metabolic characteristics, we conducted an extensive comparative pangenome analysis using complete, dereplicated genome sets for both species. Phylogenetic analyses, employing both the entirety of the identified single-copy orthologous genes and six housekeeping genes commonly used for fungal classification, did not reveal clear differentiation between A. flavus and A. oryzae genomes. Upon analyzing the aflatoxin biosynthesis gene clusters within the genomes, we observed that non-aflatoxin-producing strains were dispersed throughout the phylogenetic tree, encompassing both A. flavus and A. oryzae strains. This suggests that aflatoxin production is not a distinguishing trait between the two species. Furthermore, A. oryzae and A. flavus strains displayed remarkably similar genomic attributes, including genome sizes, gene contents, and G + C contents, as well as metabolic features and pathways. The profiles of CAZyme genes and secondary metabolite biosynthesis gene clusters within the genomes of both species further highlight their similarity. Collectively, these findings challenge the conventional differentiation of A. flavus and A. oryzae as distinct species and highlight their phylogenetic, genomic, and metabolic homogeneity, potentially indicating that they may indeed belong to the same species.}, } @article {pmid38224489, year = {2024}, author = {Wendisch, VF and Brito, LF and Passaglia, LMP}, title = {Genome-based analyses to learn from and about Paenibacillus sonchi genomovar Riograndensis SBR5T.}, journal = {Genetics and molecular biology}, volume = {46}, number = {3 Suppl 1}, pages = {e20230115}, doi = {10.1590/1678-4685-GMB-2023-0115}, pmid = {38224489}, issn = {1415-4757}, abstract = {Paenibacillus sonchi genomovar Riograndensis SBR5T is a plant growth-promoting rhizobacterium (PGPR) isolated in the Brazilian state of Rio Grande do Sul from the rhizosphere of Triticum aestivum. It fixes nitrogen, produces siderophores as well as the phytohormone indole-3-acetic acid, solubilizes phosphate and displays antagonist activity against Listeria monocytogenes and Pectobacterium carotovorum. Comprehensive omics analysis and the development of genetic tools are key to characterizing and engineering such non-model microorganisms. Therefore, the complete genome of SBR5T was sequenced, and shown to encode 6,705 proteins, 87 tRNAs, and 27 rRNAs and it enabled a landscape transcriptome analysis that unveiled conserved transcriptional and translational patterns and characterized operon structures and riboswitches. The pangenome of P. sonchi species is open with a stable core pangenome. At the same time, the analysis of genes coding for nitrogenases revealed that the trait of nitrogen fixation is sparse within the Paenibacillaceae family and the presence of Fe-only nitrogenase in the P. sonchi group was exclusive to SBR5T. The development of genetic tools for SBR5T enabled genetic transformation, plasmid construction for constitutive and inducible gene expression, and gene repression using the CRISPRi system. Altogether, the work with P. sonchi can guide the study of non-model bacteria with economic potential.}, } @article {pmid38217963, year = {2024}, author = {Monterrubio-López, GP and Llamas-Monroy, JL and Martínez-Gómez, ÁA and Delgadillo-Gutiérrez, K}, title = {Novel vaccine candidates of Bordetella pertussis identified by reverse vaccinology.}, journal = {Biologicals : journal of the International Association of Biological Standardization}, volume = {85}, number = {}, pages = {101740}, doi = {10.1016/j.biologicals.2023.101740}, pmid = {38217963}, issn = {1095-8320}, abstract = {Whooping cough is a disease caused by Bordetella pertussis, whose morbidity has increased, motivating the improvement of current vaccines. Reverse vaccinology is a strategy that helps identify proteins with good characteristics fast and with fewer resources. In this work, we applied reverse vaccinology to study the B. pertussis proteome and pangenome with several in-silico tools. We analyzed the B. pertussis Tohama I proteome with NERVE software and compared 234 proteins with B. parapertussis, B. bronchiseptica, and B. holmessi. VaxiJen was used to calculate an antigenicity value; our threshold was 0.6, selecting 84 proteins. The candidates were depurated and grouped in eight family proteins to select representative candidates, according to bibliographic information and their immunological response predicted with ABCpred, Bcepred, IgPred, and C-ImmSim. Additionally, a pangenome study was conducted with 603 B. pertussis strains and PanRV software, identifying 3421 core proteins that were analyzed to select the best candidates. Finally, we selected 15 proteins from the proteome study and seven proteins from the pangenome analysis as good vaccine candidates.}, } @article {pmid38216873, year = {2024}, author = {Yang, Z and Yang, X and Wang, M and Jia, R and Chen, S and Liu, M and Zhao, X and Yang, Q and Wu, Y and Zhang, S and Huang, J and Ou, X and Mao, S and Gao, Q and Sun, D and Tian, B and Zhu, D and Cheng, A}, title = {Genome-wide association study reveals serovar-associated genetic loci in Riemerella anatipestifer.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {57}, pmid = {38216873}, issn = {1471-2164}, abstract = {BACKGROUND: The disease caused by Riemerella anatipestifer (R. anatipestifer, RA) results in large economic losses to the global duck industry every year. Serovar-related genomic variation, such as the O-antigen and capsular polysaccharide (CPS) gene clusters, has been widely used for serotyping in many gram-negative bacteria. RA has been classified into at least 21 serovars based on slide agglutination, but the molecular basis of serotyping is unknown. In this study, we performed a pan-genome-wide association study (Pan-GWAS) to identify the genetic loci associated with RA serovars.

RESULTS: The results revealed a significant association between the putative CPS synthesis gene locus and the serological phenotype. Further characterization of the CPS gene clusters in 11 representative serovar strains indicated that they were highly diverse and serovar-specific. The CPS gene cluster contained the key genes wzx and wzy, which are involved in the Wzx/Wzy-dependent pathway of CPS synthesis. Similar CPS loci have been found in some other species within the family Weeksellaceae. We have also shown that deletion of the wzy gene in RA results in capsular defects and cross-agglutination.

CONCLUSIONS: This study indicates that the CPS synthesis gene cluster of R. anatipestifer is a serotype-specific genetic locus. Importantly, our finding provides a new perspective for the systematic analysis of the genetic basis of the R anatipestifer serovars and a potential target for establishing a complete molecular serotyping scheme.}, } @article {pmid38216606, year = {2024}, author = {Schreiber, M and Wonneberger, R and Haaning, AM and Coulter, M and Russell, J and Himmelbach, A and Fiebig, A and Muehlbauer, GJ and Stein, N and Waugh, R}, title = {Genomic resources for a historical collection of cultivated two-row European spring barley genotypes.}, journal = {Scientific data}, volume = {11}, number = {1}, pages = {66}, pmid = {38216606}, issn = {2052-4463}, support = {BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; BB/S004610/1//RCUK | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; MU 3589/1-1//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; MU 3589/1-1//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; 1844331//National Science Foundation (NSF)/ ; 1844331//National Science Foundation (NSF)/ ; }, abstract = {Barley genomic resources are increasing rapidly, with the publication of a barley pangenome as one of the latest developments. Two-row spring barley cultivars are intensely studied as they are the source of high-quality grain for malting and distilling. Here we provide data from a European two-row spring barley population containing 209 different genotypes registered for the UK market between 1830 to 2014. The dataset encompasses RNA-sequencing data from six different tissues across a range of barley developmental stages, phenotypic datasets from two consecutive years of field-grown trials in the United Kingdom, Germany and the USA; and whole genome shotgun sequencing from all cultivars, which was used to complement the RNA-sequencing data for variant calling. The outcomes are a filtered SNP marker file, a phenotypic database and a large gene expression dataset providing a comprehensive resource which allows for downstream analyses like genome wide association studies or expression associations.}, } @article {pmid38214698, year = {2024}, author = {Park, S and Kim, I and Chhetri, G and Jung, Y and Woo, H and Seo, T}, title = {Cellulomonas alba sp. nov. and Cellulomonas edaphi sp. nov., isolated from wetland soils.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {74}, number = {1}, pages = {}, doi = {10.1099/ijsem.0.006235}, pmid = {38214698}, issn = {1466-5034}, abstract = {Two novel strains were isolated from wetland soils in Goyang, Republic of Korea. The two Gram-stain-positive, facultatively anaerobic, rod-shaped bacterial-type strains were designated MW4[T] and MW9[T]. Phylogenomic analysis based on whole-genome sequences suggested that both strains belonged to the genus Cellulomonas. The cells of strain MW4[T] were non-motile and grew at 20-40 °C (optimum, 35 °C), at pH 6.0-10.0 (optimum, pH 8.0) and in the presence of 0-1.0% NaCl (optimum, 0 %). The cells of strain MW9[T] were non-motile and grew at 20-40 °C (optimum, 35 °C), at pH 5.0-9.0 (optimum, pH 8.0) and in the presence of 0-1.0% NaCl (optimum, 0 %). The average nucleotide identity (77.1-88.1 %) and digital DNA-DNA hybridization values (21.0-34.8 %) between the two novel strains and with their closely related strains fell within the range for the genus Cellulomonas. The novel strains MW4[T] and MW9[T] and reference strains possessed alkane synthesis gene clusters (oleA, oleB, oleC and oleD). Phylogenomic, phylogenetic, average nucleotide identity, digital DNA-DNA hybridization, physiological and biochemical data indicated that the novel strains were distinct from other members of the family Cellulomonadaceae. We propose the names Cellulomonas alba sp. nov. (type strain MW4[T]=KACC 23260[T]=TBRC 17645[T]) and Cellulomons edaphi sp. nov. (type strain MW9[T]=KACC 23261[T]=TBRC 17646[T]) for the two strains.}, } @article {pmid38213027, year = {2024}, author = {Ferrero-Serrano, Á and Chakravorty, D and Kirven, KJ and Assmann, SM}, title = {Oryza CLIMtools: A Genome-Environment Association Resource Reveals Adaptive Roles for Heterotrimeric G Proteins in the Regulation of Rice Agronomic Traits.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100813}, doi = {10.1016/j.xplc.2024.100813}, pmid = {38213027}, issn = {2590-3462}, abstract = {Modern crop varieties display a degree of mismatch between their current distributions and the suitability of the local climate for their productivity. To this end, we present Oryza CLIMtools (https://gramene.org/CLIMtools/oryza_v1.0/), the first resource for pan-genome prediction of climate-associated genetic variants in a crop species. Oryza CLIMtools consists of interactive web-based databases that allow the user to: i) explore the local environments of traditional rice varieties (landraces) in South-Eastern Asia, and; ii) investigate the environment by genome associations for 658 Indica and 283 Japonica rice landrace accessions collected from georeferenced local environments and included in the 3K Rice Genomes Project. We exemplify the value of these resources, identifying an interplay between flowering time and temperature in the local environment that is facilitated by adaptive natural variation in OsHD2 and disrupted by a natural variant in OsSOC1. Prior QTL analysis has suggested the importance of heterotrimeric G proteins in the control of agronomic traits. Accordingly, we analyzed the climate associations of natural variants in the different heterotrimeric G protein subunits. We identified a coordinated role of G proteins in adaptation to the prevailing Potential Evapotranspiration gradient and their regulation of key agronomic traits including plant height and seed and panicle length. We conclude by highlighting the prospect of targeting heterotrimeric G proteins to produce crops that are climate resilient.}, } @article {pmid38203838, year = {2024}, author = {Bin Hafeez, A and Pełka, K and Worobo, R and Szweda, P}, title = {In Silico Safety Assessment of Bacillus Isolated from Polish Bee Pollen and Bee Bread as Novel Probiotic Candidates.}, journal = {International journal of molecular sciences}, volume = {25}, number = {1}, pages = {}, doi = {10.3390/ijms25010666}, pmid = {38203838}, issn = {1422-0067}, abstract = {Bacillus species isolated from Polish bee pollen (BP) and bee bread (BB) were characterized for in silico probiotic and safety attributes. A probiogenomics approach was used, and in-depth genomic analysis was performed using a wide array of bioinformatics tools to investigate the presence of virulence and antibiotic resistance properties, mobile genetic elements, and secondary metabolites. Functional annotation and Carbohydrate-Active enZYmes (CAZYme) profiling revealed the presence of genes and a repertoire of probiotics properties promoting enzymes. The isolates BB10.1, BP20.15 (isolated from bee bread), and PY2.3 (isolated from bee pollen) genome mining revealed the presence of several genes encoding acid, heat, cold, and other stress tolerance mechanisms, adhesion proteins required to survive and colonize harsh gastrointestinal environments, enzymes involved in the metabolism of dietary molecules, antioxidant activity, and genes associated with the synthesis of vitamins. In addition, genes responsible for the production of biogenic amines (BAs) and D-/L-lactate, hemolytic activity, and other toxic compounds were also analyzed. Pan-genome analyses were performed with 180 Bacillus subtilis and 204 Bacillus velezensis genomes to mine for any novel genes present in the genomes of our isolates. Moreover, all three isolates also consisted of gene clusters encoding secondary metabolites.}, } @article {pmid38203357, year = {2023}, author = {Liu, K and Xu, H and Gao, X and Lu, Y and Wang, L and Ren, Z and Chen, C}, title = {Pan-Genome Analysis of TIFY Gene Family and Functional Analysis of CsTIFY Genes in Cucumber.}, journal = {International journal of molecular sciences}, volume = {25}, number = {1}, pages = {}, doi = {10.3390/ijms25010185}, pmid = {38203357}, issn = {1422-0067}, support = {32372703//the National Natural Science Foundation of China/ ; 32172605//the National Natural Science Foundation of China/ ; ZR2022MC084//the Shandong Natural Science Foundation/ ; }, abstract = {Cucumbers are frequently affected by gray mold pathogen Botrytis cinerea, a pathogen that causes inhibited growth and reduced yield. Jasmonic acid (JA) plays a primary role in plant responses to biotic stresses, and the jasmonate-ZIM-Domain (JAZ) proteins are key regulators of the JA signaling pathway. In this study, we used the pan-genome of twelve cucumber varieties to identify cucumber TIFY genes. Our findings revealed that two CsTIFY genes were present in all twelve cucumber varieties and showed no differences in protein sequence, gene structure, and motif composition. This suggests their evolutionary conservation across different cucumber varieties and implies that they may play a crucial role in cucumber growth. On the other hand, the other fourteen CsTIFY genes exhibited variations in protein sequence and gene structure or conserved motifs, which could be the result of divergent evolution, as these genes adapt to different cultivation and environmental conditions. Analysis of the expression profiles of the CsTIFY genes showed differential regulation by B. cinerea. Transient transfection plants overexpressing CsJAZ2, CsJAZ6, or CsZML2 were found to be more susceptible to B. cinerea infection compared to control plants. Furthermore, these plants infected by the pathogen showed lower levels of the enzymatic activities of POD, SOD and CAT. Importantly, after B. cinerea infection, the content of JA was upregulated in the plants, and cucumber cotyledons pretreated with exogenous MeJA displayed increased resistance to B. cinerea infection compared to those pretreated with water. Therefore, this study explored key TIFY genes in the regulation of cucumber growth and adaptability to different cultivation environments based on bioinformatics analysis and demonstrated that CsJAZs negatively regulate cucumber disease resistance to gray mold via multiple signaling pathways.}, } @article {pmid38200255, year = {2024}, author = {Sosinsky, A and Ambrose, J and Cross, W and Turnbull, C and Henderson, S and Jones, L and Hamblin, A and Arumugam, P and Chan, G and Chubb, D and Noyvert, B and Mitchell, J and Walker, S and Bowman, K and Pasko, D and Buongermino Pereira, M and Volkova, N and Rueda-Martin, A and Perez-Gil, D and Lopez, J and Pullinger, J and Siddiq, A and Zainy, T and Choudhury, T and Yavorska, O and Fowler, T and Bentley, D and Kingsley, C and Hing, S and Deans, Z and Rendon, A and Hill, S and Caulfield, M and Murugaesu, N}, title = {Insights for precision oncology from the integration of genomic and clinical data of 13,880 tumors from the 100,000 Genomes Cancer Programme.}, journal = {Nature medicine}, volume = {}, number = {}, pages = {}, pmid = {38200255}, issn = {1546-170X}, support = {C1298/A8362//Cancer Research UK (CRUK)/ ; C17422/A25154.//Cancer Research UK (CRUK)/ ; Barts Biomedical Research Centre//DH | National Institute for Health Research (NIHR)/ ; }, abstract = {The Cancer Programme of the 100,000 Genomes Project was an initiative to provide whole-genome sequencing (WGS) for patients with cancer, evaluating opportunities for precision cancer care within the UK National Healthcare System (NHS). Genomics England, alongside NHS England, analyzed WGS data from 13,880 solid tumors spanning 33 cancer types, integrating genomic data with real-world treatment and outcome data, within a secure Research Environment. Incidence of somatic mutations in genes recommended for standard-of-care testing varied across cancer types. For instance, in glioblastoma multiforme, small variants were present in 94% of cases and copy number aberrations in at least one gene in 58% of cases, while sarcoma demonstrated the highest occurrence of actionable structural variants (13%). Homologous recombination deficiency was identified in 40% of high-grade serous ovarian cancer cases with 30% linked to pathogenic germline variants, highlighting the value of combined somatic and germline analysis. The linkage of WGS and longitudinal life course clinical data allowed the assessment of treatment outcomes for patients stratified according to pangenomic markers. Our findings demonstrate the utility of linking genomic and real-world clinical data to enable survival analysis to identify cancer genes that affect prognosis and advance our understanding of how cancer genomics impacts patient outcomes.}, } @article {pmid38191433, year = {2024}, author = {Zhang, RY and Wang, YR and Liu, RL and Rhee, SK and Zhao, GP and Quan, ZX}, title = {Metagenomic characterization of a novel non-ammonia-oxidizing Thaumarchaeota from hadal sediment.}, journal = {Microbiome}, volume = {12}, number = {1}, pages = {7}, pmid = {38191433}, issn = {2049-2618}, support = {2021R1A2C3004015//National Research Foundation of Korea/ ; 2018YFC0310600//the National Key R&D Program of China/ ; 31870109, 31811540398//the National Natural Science Foundation of China (NSFC)/ ; }, abstract = {BACKGROUND: The hadal sediment, found at an ocean depth of more than 6000 m, is geographically isolated and under extremely high hydrostatic pressure, resulting in a unique ecosystem. Thaumarchaeota are ubiquitous marine microorganisms predominantly present in hadal environments. While there have been several studies on Thaumarchaeota there, most of them have primarily focused on ammonia-oxidizing archaea (AOA). However, systematic metagenomic research specifically targeting heterotrophic non-AOA Thaumarchaeota is lacking.

RESULTS: In this study, we explored the metagenomes of Challenger Deep hadal sediment, focusing on the Thaumarchaeota. Functional analysis of sequence reads revealed the potential contribution of Thaumarchaeota to recalcitrant dissolved organic matter degradation. Metagenome assembly binned one new group of hadal sediment-specific and ubiquitously distributed non-AOA Thaumarchaeota, named Group-3.unk. Pathway reconstruction of this new type of Thaumarchaeota also supports heterotrophic characteristics of Group-3.unk, along with ABC transporters for the uptake of amino acids and carbohydrates and catabolic utilization of these substrates. This new clade of Thaumarchaeota also contains aerobic oxidation of carbon monoxide-related genes. Complete glyoxylate cycle is a distinctive feature of this clade in supplying intermediates of anabolic pathways. The pan-genomic and metabolic analyses of metagenome-assembled genomes belonging to Group-3.unk Thaumarchaeota have highlighted distinctions, including the dihydroxy phthalate decarboxylase gene associated with the degradation of aromatic compounds and the absence of genes related to the synthesis of some types of vitamins compared to AOA. Notably, Group-3.unk shares a common feature with deep ocean AOA, characterized by their high hydrostatic pressure resistance, potentially associated with the presence of V-type ATP and di-myo-inositol phosphate syntheses-related genes. The enrichment of organic matter in hadal sediments might be attributed to the high recruitment of sequence reads of the Group-3.unk clade of heterotrophic Thaumarchaeota in the trench sediment. Evolutionary and genetic dynamic analyses suggest that Group-3 non-AOA consists of mesophilic Thaumarchaeota organisms. These results indicate a potential role in the transition from non-AOA to AOA Thaumarchaeota and from thermophilic to mesophilic Thaumarchaeota, shedding light on recent evolutionary pathways.

CONCLUSIONS: One novel clade of heterotrophic non-AOA Thaumarchaeota was identified through metagenome analysis of sediments from Challenger Deep. Our study provides insight into the ecology and genomic characteristics of the new sub-group of heterotrophic non-AOA Thaumarchaeota, thereby extending the knowledge of the evolution of Thaumarchaeota. Video Abstract.}, } @article {pmid38189173, year = {2024}, author = {Biderre-Petit, C and Courtine, D and Hennequin, C and Galand, PE and Bertilsson, S and Debroas, D and Monjot, A and Lepère, C and Divne, AM and Hochart, C}, title = {A pan-genomic approach reveals novel Sulfurimonas clade in the ferruginous meromictic Lake Pavin.}, journal = {Molecular ecology resources}, volume = {}, number = {}, pages = {e13923}, doi = {10.1111/1755-0998.13923}, pmid = {38189173}, issn = {1755-0998}, abstract = {The permanently anoxic waters in meromictic lakes create suitable niches for the growth of bacteria using sulphur metabolisms like sulphur oxidation. In Lake Pavin, the anoxic water mass hosts an active cryptic sulphur cycle that interacts narrowly with iron cycling, however the metabolisms of the microorganisms involved are poorly known. Here we combined metagenomics, single-cell genomics, and pan-genomics to further expand our understanding of the bacteria and the corresponding metabolisms involved in sulphur oxidation in this ferruginous sulphide- and sulphate-poor meromictic lake. We highlighted two new species within the genus Sulfurimonas that belong to a novel clade of chemotrophic sulphur oxidisers exclusive to freshwaters. We moreover conclude that this genus holds a key-role not only in limiting sulphide accumulation in the upper part of the anoxic layer but also constraining carbon, phosphate and iron cycling.}, } @article {pmid38188626, year = {2023}, author = {Karthik, K and Subramanian, S and Vinoli Priyadharshini, M and Jawahar, A and Anbazhagan, S and Kathiravan, RS and Thomas, P and Babu, RPA and Gopalan Tirumurugaan, K and Raj, GD}, title = {Whole genome sequencing and comparative genomics of Mycobacterium orygis isolated from different animal hosts to identify specific diagnostic markers.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1302393}, pmid = {38188626}, issn = {2235-2988}, abstract = {INTRODUCTION: Mycobacterium orygis, a member of MTBC has been identified in higher numbers in the recent years from animals of South Asia. Comparative genomics of this important zoonotic pathogen is not available which can provide data on the molecular difference between other MTBC members. Hence, the present study was carried out to isolate, whole genome sequence M. orygis from different animal species (cattle, buffalo and deer) and to identify molecular marker for the differentiation of M. orygis from other MTBC members.

METHODS: Isolation and whole genome sequencing of M. orygis was carried out for 9 samples (4 cattle, 4 deer and 1 buffalo) died due to tuberculosis. Comparative genomics employing 53 genomes (44 from database and 9 newly sequenced) was performed to identify SNPs, spoligotype, pangenome structure, and region of difference.

RESULTS: M. orygis was isolated from water buffalo and sambar deer which is the first of its kind report worldwide. Comparative pangenomics of all M. orygis strains worldwide (n= 53) showed a closed pangenome structure which is also reported for the first time. Pairwise SNP between TANUVAS_2, TANUVAS_4, TANUVAS_5, TANUVAS_7 and NIRTAH144 was less than 15 indicating that the same M. orygis strain may be the cause for infection. Region of difference prediction showed absence of RD7, RD8, RD9, RD10, RD12, RD301, RD315 in all the M. orygis analyzed. SNPs in virulence gene, PE35 was found to be unique to M. orygis which can be used as marker for identification.

CONCLUSION: The present study is yet another supportive evidence that M. orygis is more prevalent among animals in South Asia and the zoonotic potential of this organism needs to be evaluated.}, } @article {pmid38187556, year = {2023}, author = {Oles, RE and Terrazas, MC and Loomis, LR and Hsu, CY and Tribelhorn, C and Ferre, PB and Ea, A and Bryant, M and Young, J and Carrow, HC and Sandborn, WJ and Dulai, P and Sivagnanam, M and Pride, D and Knight, R and Chu, H}, title = {Pangenome comparison of Bacteroides fragilis genomospecies unveil genetic diversity and ecological insights.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.12.20.572674}, pmid = {38187556}, abstract = {Bacteroides fragilis is a Gram-negative commensal bacterium commonly found in the human colon that differentiates into two genomospecies termed division I and II. We leverage a comprehensive collection of 694 B. fragilis whole genome sequences and report differential gene abundance to further support the recent proposal that divisions I and II represent separate species. In division I strains, we identify an increased abundance of genes related to complex carbohydrate degradation, colonization, and host niche occupancy, confirming the role of division I strains as gut commensals. In contrast, division II strains display an increased prevalence of plant cell wall degradation genes and exhibit a distinct geographic distribution, primarily originating from Asian countries, suggesting dietary influences. Notably, division II strains have an increased abundance of genes linked to virulence, survival in toxic conditions, and antimicrobial resistance, consistent with a higher incidence of these strains in bloodstream infections. This study provides new evidence supporting a recent proposal for classifying divisions I and II B. fragilis strains as distinct species, and our comparative genomic analysis reveals their niche-specific roles.}, } @article {pmid38183874, year = {2023}, author = {Yu, K and Huang, Z and Xiao, Y and Gao, H and Bai, X and Wang, D}, title = {Global spread characteristics of CTX-M-type extended-spectrum β-lactamases: A genomic epidemiology analysis.}, journal = {Drug resistance updates : reviews and commentaries in antimicrobial and anticancer chemotherapy}, volume = {73}, number = {}, pages = {101036}, doi = {10.1016/j.drup.2023.101036}, pmid = {38183874}, issn = {1532-2084}, abstract = {BACKGROUND: Extended-spectrum β-lactamases (ESBLs) producing bacteria have spread worldwide and become a global public health concern. Plasmid-mediated transfer of ESBLs is an important route for resistance acquisition.

METHODS: We collected 1345 complete sequences of plasmids containing CTX-Ms from public database. The global transmission pattern of plasmids and evolutionary dynamics of CTX-Ms have been inferred. We applied the pan-genome clustering based on plasmid genomes and evolution analysis to demonstrate the transmission events.

FINDINGS: Totally, 48 CTX-Ms genotypes and 186 incompatible types of plasmids were identified. The geographical distribution of CTX-Ms showed significant differences across countries and continents. CTX-M-14 and CTX-M-55 were found to be the dominant genotypes in Asia, while CTX-M-1 played a leading role in Europe. The plasmids can be divided into 12 lineages, some of which forming distinct geographical clusters in Asia and Europe, while others forming hybrid populations. The Inc types of plasmids are lineage-specific, with the CTX-M-1_IncI1-I (Alpha) and CTX-M-65_IncFII (pHN7A8)/R being the dominant patterns of cross-host and cross-regional transmission. The IncI-I (Alpha) plasmids with the highest number, were presumed to form communication groups in Europe-Asia and Asia-America-Oceania, showing the transmission model as global dissemination and regional microevolution. Meanwhile, the main kinetic elements of blaCTX-Ms showed genotypic preferences. ISEcpl and IS26 were most frequently involved in the transfer of CTX-M-14 and CTX-M-65, respectively. IS15 has become a crucial participant in mediating the dissemination of blaCTX-Ms. Interestingly, blaTEM and blaCTX-Ms often coexisted in the same transposable unit. Furthermore, antibiotic resistance genes associated with aminoglycosides, sulfonamides and cephalosporins showed a relatively high frequency of synergistic effects with CTX-Ms.

CONCLUSIONS: We recognized the dominant blaCTX-Ms and mainstream plasmids of different continents. The results of this study provide support for a more effective response to the risks associated with the evolution of blaCTX-Ms-bearing plasmids, and lay the foundation for genotype-specific epidemiological surveillance of resistance, which are of important public health implications.}, } @article {pmid38181886, year = {2024}, author = {Verma, N and Sharma, T and Bhardwaj, A and Ramana, VV}, title = {Comparative genomics and characterization of a multidrug-resistant Acinetobacter baumannii VRL-M19 isolated from a crowded setting in India.}, journal = {Infection, genetics and evolution : journal of molecular epidemiology and evolutionary genetics in infectious diseases}, volume = {}, number = {}, pages = {105549}, doi = {10.1016/j.meegid.2023.105549}, pmid = {38181886}, issn = {1567-7257}, abstract = {A crowded vegetable market serves as a mass gathering, posing a potential risk for infection transmission. In this study, we isolated a multidrug-resistant Acinetobacter baumannii strain, VRL-M19, from the air of such a market and conducted comparative genomics and phenotypic characterization. Antimicrobial susceptibility testing, genome sequencing using Illumina HiSeq X10, and pan-genome analysis with 788 clinical isolates identified core, accessory, and unique drug-resistant determinants. Mutational analysis of drug-resistance genes, virulence factor annotation, in vitro pathogenicity assessment, subsystem analysis, Multilocus sequence typing, and whole genome phylogenetic analysis were performed. VRL-M19 exhibited multidrug resistance with 69 determinants, and analysis across 788 clinical isolates and 350 Indian isolates revealed more accessory genes (52 out of 69) in the Indian isolates. Multiple mutations were observed in drug target modification genes, and the strain was identified as a moderate biofilm-former with 55 virulence factors. Whole genome phylogenetics indicated a close relationship between VRL-M19 and clinical A. baumannii strains. In conclusion, our comprehensive study suggests that VRL-M19 is a multidrug-resistant, potential pathogen with biofilm-forming capabilities, closely associated with clinical A. baumannii strains.}, } @article {pmid38177691, year = {2024}, author = {Domingo-Sananes, MR and Meehan, CJ}, title = {The population genetics of prokaryotic pangenomes.}, journal = {Nature ecology & evolution}, volume = {}, number = {}, pages = {}, pmid = {38177691}, issn = {2397-334X}, } @article {pmid38177690, year = {2024}, author = {Douglas, GM and Shapiro, BJ}, title = {Pseudogenes act as a neutral reference for detecting selection in prokaryotic pangenomes.}, journal = {Nature ecology & evolution}, volume = {}, number = {}, pages = {}, pmid = {38177690}, issn = {2397-334X}, abstract = {A long-standing question is to what degree genetic drift and selection drive the divergence in rare accessory gene content between closely related bacteria. Rare genes, including singletons, make up a large proportion of pangenomes (all genes in a set of genomes), but it remains unclear how many such genes are adaptive, deleterious or neutral to their host genome. Estimates of species' effective population sizes (Ne) are positively associated with pangenome size and fluidity, which has independently been interpreted as evidence for both neutral and adaptive pangenome models. We hypothesized that pseudogenes, used as a neutral reference, could be used to distinguish these models. We find that most functional categories are depleted for rare pseudogenes when a genome encodes only a single intact copy of a gene family. In contrast, transposons are enriched in pseudogenes, suggesting they are mostly neutral or deleterious to the host genome. Thus, even if individual rare accessory genes vary in their effects on host fitness, we can confidently reject a model of entirely neutral or deleterious rare genes. We also define the ratio of singleton intact genes to singleton pseudogenes (si/sp) within a pangenome, compare this measure across 668 prokaryotic species and detect a signal consistent with the adaptive value of many rare accessory genes. Taken together, our work demonstrates that comparing with pseudogenes can improve inferences of the evolutionary forces driving pangenome variation.}, } @article {pmid38173673, year = {2023}, author = {Sarr, M and Alou, MT and Padane, A and Diouf, FS and Beye, M and Sokhna, C and Fenollar, F and Mboup, S and Raoult, D and Million, M}, title = {A review of the literature of Listeria monocytogenes in Africa highlights breast milk as an overlooked human source.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1213953}, pmid = {38173673}, issn = {1664-302X}, abstract = {According to the latest WHO estimates (2015) of the global burden of foodborne diseases, Listeria monocytogenes is responsible for one of the most serious foodborne infections and commonly results in severe clinical outcomes. The 2013 French MONALISA prospective cohort identified that women born in Africa has a 3-fold increase in the risk of maternal neonatal listeriosis. One of the largest L. monocytogenes outbreaks occurred in South Africa in 2017-2018 with over 1,000 cases. Moreover, recent findings identified L. monocytogenes in human breast milk in Mali and Senegal with its relative abundance positively correlated with severe acute malnutrition. These observations suggest that the carriage of L. monocytogenes in Africa should be further explored, starting with the existing literature. For that purpose, we searched the peer-reviewed and grey literature published dating back to 1926 to date using six databases. Ultimately, 225 articles were included in this review. We highlighted that L. monocytogenes is detected in various sample types including environmental samples, food samples as well as animal and human samples. These studies were mostly conducted in five east African countries, four west African countries, four north African countries, and two Southern African countries. Moreover, only ≈ 0.2% of the Listeria monocytogenes genomes available on NCBI were obtained from African samples, contracted with its detection. The pangenome resulting from the African Listeria monocytogenes samples revealed three clusters including two from South-African strains as well as one consisting of the strains isolated from breast milk in Mali and Senegal and, a vaginal post-miscarriage sample. This suggests there was a clonal complex circulating in Mali and Senegal. As this clone has not been associated to infections, further studies should be conducted to confirm its circulation in the region and explore its association with foodborne infections. Moreover, it is apparent that more resources should be allocated to the detection of L. monocytogenes as only 15/54 countries have reported its detection in the literature. It seems paramount to map the presence and carriage of L. monocytogenes in all African countries to prevent listeriosis outbreaks and the related miscarriages and confirm its association with severe acute malnutrition.}, } @article {pmid38172677, year = {2024}, author = {Choi, DG and Baek, JH and Han, DM and Khan, SA and Jeon, CO}, title = {Comparative pangenome analysis of Enterococcus faecium and Enterococcus lactis provides new insights into the adaptive evolution by horizontal gene acquisitions.}, journal = {BMC genomics}, volume = {25}, number = {1}, pages = {28}, pmid = {38172677}, issn = {1471-2164}, support = {Graduate Research Scholarship in 2018//Chung-Ang University/ ; PJ01710102//Rural Development Administration/ ; 2018R1A5A1025077//Ministry of Science and ICT, South Korea/ ; }, abstract = {BACKGROUND: Enterococcus faecium and E. lactis are phylogenetically closely related lactic acid bacteria that are ubiquitous in nature and are known to be beneficial or pathogenic. Despite their considerable industrial and clinical importance, comprehensive studies on their evolutionary relationships and genomic, metabolic, and pathogenic traits are still lacking. Therefore, we conducted comparative pangenome analyses using all available dereplicated genomes of these species.

RESULTS: E. faecium was divided into two subclades: subclade I, comprising strains derived from humans, animals, and food, and the more recent phylogenetic subclade II, consisting exclusively of human-derived strains. In contrast, E. lactis strains, isolated from diverse sources including foods, humans, animals, and the environment, did not display distinct clustering based on their isolation sources. Despite having similar metabolic features, noticeable genomic differences were observed between E. faecium subclades I and II, as well as E. lactis. Notably, E. faecium subclade II strains exhibited significantly larger genome sizes and higher gene counts compared to both E. faecium subclade I and E. lactis strains. Furthermore, they carried a higher abundance of antibiotic resistance, virulence, bacteriocin, and mobile element genes. Phylogenetic analysis of antibiotic resistance and virulence genes suggests that E. faecium subclade II strains likely acquired these genes through horizontal gene transfer, facilitating their effective adaptation in response to antibiotic use in humans.

CONCLUSIONS: Our study offers valuable insights into the adaptive evolution of E. faecium strains, enabling their survival as pathogens in the human environment through horizontal gene acquisitions.}, } @article {pmid38170317, year = {2024}, author = {Lin, J and Xiao, Y and Liu, H and Gao, D and Duan, Y and Zhu, X}, title = {Combined transcriptomic and pangenomic analyses guide metabolic amelioration to enhance tiancimycins production.}, journal = {Applied microbiology and biotechnology}, volume = {108}, number = {1}, pages = {1-11}, pmid = {38170317}, issn = {1432-0614}, support = {2020zzts248//Fundamental Research Funds for Central Universities of the Central South University/ ; 81530092//National Natural Science Foundation of China/ ; B0803420//Chinese Ministry of Education 111/ ; }, abstract = {Exploration of high-yield mechanism is important for further titer improvement of valuable antibiotics, but how to achieve this goal is challenging. Tiancimycins (TNMs) are anthraquinone-fused enediynes with promising drug development potentials, but their prospective applications are limited by low titers. This work aimed to explore the intrinsic high-yield mechanism in previously obtained TNMs high-producing strain Streptomyces sp. CB03234-S for the further titer amelioration of TNMs. First, the typical ribosomal RpsL(K43N) mutation in CB03234-S was validated to be merely responsible for the streptomycin resistance but not the titer improvement of TNMs. Subsequently, the combined transcriptomic, pan-genomic and KEGG analyses revealed that the significant changes in the carbon and amino acid metabolisms could reinforce the metabolic fluxes of key CoA precursors, and thus prompted the overproduction of TNMs in CB03234-S. Moreover, fatty acid metabolism was considered to exert adverse effects on the biosynthesis of TNMs by shunting and reducing the accumulation of CoA precursors. Therefore, different combinations of relevant genes were respectively overexpressed in CB03234-S to strengthen fatty acid degradation. The resulting mutants all showed the enhanced production of TNMs. Among them, the overexpression of fadD, a key gene responsible for the first step of fatty acid degradation, achieved the highest 21.7 ± 1.1 mg/L TNMs with a 63.2% titer improvement. Our studies suggested that comprehensive bioinformatic analyses are effective to explore metabolic changes and guide rational metabolic reconstitution for further titer improvement of target products. KEY POINTS: • Comprehensive bioinformatic analyses effectively reveal primary metabolic changes. • Primary metabolic changes cause precursor enrichment to enhance TNMs production. • Strengthening of fatty acid degradation further improves the titer of TNMs.}, } @article {pmid38168881, year = {2024}, author = {Triesch, S and Denton, AK and Bouvier, JW and Buchmann, JP and Reichel-Deland, V and Guerreiro, RNFM and Busch, N and Schlüter, U and Stich, B and Kelly, S and Weber, APM}, title = {Transposable elements contribute to the establishment of the glycine shuttle in Brassicaceae species.}, journal = {Plant biology (Stuttgart, Germany)}, volume = {}, number = {}, pages = {}, doi = {10.1111/plb.13601}, pmid = {38168881}, issn = {1438-8677}, support = {391465903/GRK 2466//Deutsche Forschungsgemeinschaft (German Research Foundation)/ ; 390686111//Germany's Excellence Strategy EXC-2048/1/ ; WE 2231/20-1//ERA-CAPS (European Research Network for Coordinating Action in Plant Sciences)/ ; 456082119//CRC (Collaborative Research Center)/ ; BB/J014427/1//BBSRC/ ; //Royal Society University Research Fellowship/ ; }, abstract = {C3 -C4 intermediate photosynthesis has evolved at least five times convergently in the Brassicaceae, despite this family lacking bona fide C4 species. The establishment of this carbon concentrating mechanism is known to require a complex suite of ultrastructural modifications, as well as changes in spatial expression patterns, which are both thought to be underpinned by a reconfiguration of existing gene-regulatory networks. However, to date, the mechanisms which underpin the reconfiguration of these gene networks are largely unknown. In this study, we used a pan-genomic association approach to identify genomic features that could confer differential gene expression towards the C3 -C4 intermediate state by analysing eight C3 species and seven C3 -C4 species from five independent origins in the Brassicaceae. We found a strong correlation between transposable element (TE) insertions in cis-regulatory regions and C3 -C4 intermediacy. Specifically, our study revealed 113 gene models in which the presence of a TE within a gene correlates with C3 -C4 intermediate photosynthesis. In this set, genes involved in the photorespiratory glycine shuttle are enriched, including the glycine decarboxylase P-protein whose expression domain undergoes a spatial shift during the transition to C3 -C4 photosynthesis. When further interrogating this gene, we discovered independent TE insertions in its upstream region which we conclude to be responsible for causing the spatial shift in GLDP1 gene expression. Our findings hint at a pivotal role of TEs in the evolution of C3 -C4 intermediacy, especially in mediating differential spatial gene expression.}, } @article {pmid38168637, year = {2024}, author = {Guo, N and Wang, S and Wang, T and Duan, M and Zong, M and Miao, L and Han, S and Wang, G and Liu, X and Zhang, D and Jiao, C and Xu, H and Chen, L and Fei, Z and Li, J and Liu, F}, title = {Graph-based Pan-genome of Brassica oleracea Provides New Insights into Its Domestication and Morphotype Diversification.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100791}, doi = {10.1016/j.xplc.2023.100791}, pmid = {38168637}, issn = {2590-3462}, abstract = {The domestication of Brassica oleracea has resulted in diverse morphological types with distinct patterns of organ development. Here we report a graph-based pan-genome of B. oleracea constructed with high-quality genome assemblies of different morphotypes. The pan-genome harbors over 200 structural variant (SV) hotspot regions enriched with auxin and flowering-related genes. Population genomic analyses reveal that early domestication of B. oleracea focused on leaf or stem development. Gene flows resulting from agricultural practices and variety improvement are detected among different morphotypes. Selective sweep and pan-genome analyses identify an auxin-responsive SAUR gene and a CLE family gene as crucial players in the leaf-stem differentiation during the early stage of B. oleracea domestication, and the BoKAN1 gene as instrumental in shaping the leafy heads of cabbage and Brussels sprouts. Our pan-genome and functional analyses further discover that variations in the BoFLC2 gene play key roles in the divergence of vernalization and flowering characteristics among different morphotypes, and variations in the first intron of BoFLC3 are involved in fine-tuning the flowering process in cauliflower. This study provides a comprehensive understanding of the pan-genome of B. oleracea and sheds light on the domestication and differential organ development of this globally important crop species.}, } @article {pmid38168361, year = {2023}, author = {Sirén, J and Eskandar, P and Ungaro, MT and Hickey, G and Eizenga, JM and Novak, AM and Chang, X and Chang, PC and Kolmogorov, M and Carroll, A and Monlong, J and Paten, B}, title = {Personalized Pangenome References.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.12.13.571553}, pmid = {38168361}, abstract = {Pangenomes, by including genetic diversity, should reduce reference bias by better representing new samples compared to them. Yet when comparing a new sample to a pangenome, variants in the pangenome that are not part of the sample can be misleading, for example, causing false read mappings. These irrelevant variants are generally rarer in terms of allele frequency, and have previously been dealt with using allele frequency filters. However, this is a blunt heuristic that both fails to remove some irrelevant variants and removes many relevant variants. We propose a new approach, inspired by local ancestry inference methods, that imputes a personalized pangenome subgraph based on sampling local haplotypes according to k -mer counts in the reads. Our approach is tailored for the Giraffe short read aligner, as the indexes it needs for read mapping can be built quickly. We compare the accuracy of our approach to state-of-the-art methods using graphs from the Human Pangenome Reference Consortium. The resulting personalized pangenome pipelines provide faster pangenome read mapping than comparable pipelines that use a linear reference, reduce small variant genotyping errors by 4x relative to the Genome Analysis Toolkit (GATK) best-practice pipeline, and for the first time make short-read structural variant genotyping competitive with long-read discovery methods.}, } @article {pmid38168234, year = {2023}, author = {Qiu, X and McGee, L and Hammitt, LL and Grant, LR and O'Brien, KL and Hanage, WP and Lipsitch, M}, title = {Prediction of post-PCV13 pneumococcal evolution using invasive disease data enhanced by inverse-invasiveness weighting.}, journal = {medRxiv : the preprint server for health sciences}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.12.10.23299786}, pmid = {38168234}, abstract = {BACKGROUND: After introduction of pneumococcal conjugate vaccines (PCVs), serotype replacement occurred in the population of Streptococcus pneumoniae. Predicting which pneumococcal clones and serotypes will become more common in carriage after vaccination can enhance vaccine design and public health interventions, while also improving our understanding of pneumococcal evolution. We sought to use invasive disease data to assess how well negative frequency-dependent selection (NFDS) models could explain pneumococcal carriage population evolution in the post-PCV13 epoch by weighting invasive data to approximate strain proportions in the carriage population.

METHODS: Invasive pneumococcal isolates were collected and sequenced during 1998-2018 by the Active Bacterial Core surveillance (ABCs) from the Centers for Disease Control and Prevention (CDC). To predict the post-PCV13 population dynamics in the carriage population using a NFDS model, all genomic data were processed under a bioinformatic pipeline of assembly, annotation, and pangenome analysis to define genetically similar sequence clusters (i.e., strains) and a set of accessory genes present in 5% to 95% of the isolates. The NFDS model predicted the strain proportion by calculating the post-vaccine strain composition in the weighted invasive disease population that would best match pre-vaccine accessory gene frequencies. To overcome the biases of invasive disease data, serotype-specific inverse-invasiveness weights were defined as the ratio of the proportion of the serotype in the carriage data to the proportion in the invasive data, using data from 1998-2001 in the United States, before conjugate vaccine introduction. The weights were applied to adjust both the observed strain proportion and the accessory gene frequencies.

RESULTS: Inverse-invasiveness weighting increased the correlation of accessory gene frequencies between invasive and carriage data with reduced residuals in linear or logit scale for pre-vaccine, post-PCV7, and post-PCV13. Similarly, weighting increased the correlation of accessory gene frequencies between different time periods in the invasive data. By weighting the invasive data, we were able to use the NFDS model to predict strain proportions in the carriage population in the post-PCV13 epoch, with the adjusted R-squared between predicted and observed strain proportions increasing from 0.176 to 0.544 after weighting.

CONCLUSIONS: The weighting system adjusted the invasive disease surveillance data to better represent the carriage population of S. pneumoniae . The NFDS mechanism predicted the strain proportions in the projected carriage population as estimated from the weighted invasive disease frequencies in the post-PCV13 epoch. Our methods enrich the value of genomic sequences from invasive disease surveillance, which is readily available, easy to collect, and of direct interest to public health.

IMPORTANCE: Streptococcus pneumoniae , a common colonizer in the human nasopharynx, can cause invasive diseases including pneumonia, bacteremia, and meningitis mostly in children under 5 years or older adults. The PCV7 was introduced in 2000 in the United States within the pediatric population to prevent disease and reduce deaths, followed by PCV13 in 2010, PCV15 in 2022, and PCV20 in 2023. After the removal of vaccine serotypes, the prevalence of carriage remained stable as the vacated pediatric ecological niche was filled with certain non-vaccine serotypes. Predicting which pneumococcal clones, and which serotypes, will be most successful in colonization after vaccination can enhance vaccine design and public health interventions, while also improving our understanding of pneumococcal evolution. While carriage data, which are collected from the pneumococcal population that is competing to colonize and transmit, are most directly relevant to evolutionary studies, invasive disease data are often more plentiful. Previously, evolutionary models based on negative frequency-dependent selection (NFDS) on the accessory genome were shown to predict which non-vaccine strains and serotypes were most successful in colonization following the introduction of PCV7. Here, we show that an inverse-invasiveness weighting system applied to invasive disease surveillance data allows the NFDS model to predict strain proportions in the projected carriage population in the post-PCV13/pre-PCV15 and -PCV20 epoch. The significance of our research lies in using a sample of invasive disease surveillance data to extend the use of NFDS as an evolutionary mechanism to predict post-PCV13 population dynamics. This has shown that we can correct for biased sampling that arises from differences in virulence and can enrich the value of genomic data from disease surveillance and advances our understanding of how NFDS impacts carriage population dynamics after both PCV7 and PCV13 vaccination.}, } @article {pmid38163518, year = {2023}, author = {Abondio, P and Bruno, F and Passarino, G and Montesanto, A and Luiselli, D}, title = {Pangenomics: a new era in the field of neurodegenerative diseases.}, journal = {Ageing research reviews}, volume = {}, number = {}, pages = {102180}, doi = {10.1016/j.arr.2023.102180}, pmid = {38163518}, issn = {1872-9649}, abstract = {A pangenome is composed of all the genetic variability of a group of individuals, and its application to the study of neurodegenerative diseases may provide valuable insights into the underlying aspects of genetic heterogenetiy for these complex ailments, including gene expression, epigenetics, and translation mechanisms. Furthermore, a reference pangenome allows for the identification of previously undetected structural commonalities and differences among individuals, which may help in the diagnosis of a disease, support the prediction of what will happen over time (prognosis) and aid in developing novel treatments in the perspective of personalized medicine. Therefore, in the present review, the application of the pangenome concept to the study of neurodegenerative diseases will be discussed and analyzed for its potential to enable an improvement in diagnosis and prognosis for these illnesses, leading to the development of tailored treatments for individual patients from the knowledge of the genomic composition of a whole population.}, } @article {pmid38158885, year = {2023}, author = {Lv, Y and Liu, C and Li, X and Wang, Y and He, H and He, W and Chen, W and Yang, L and Dai, X and Cao, X and Yu, X and Liu, J and Zhang, B and Wei, H and Zhang, H and Qian, H and Shi, C and Leng, Y and Liu, X and Guo, M and Wang, X and Zhang, Z and Wang, T and Zhang, B and Xu, Q and Cui, Y and Zhang, Q and Yuan, Q and Jahan, N and Ma, J and Zheng, X and Zhou, Y and Qian, Q and Guo, L and Shang, L}, title = {A centromere map based on super pan-genome highlights the structure and function of rice centromeres.}, journal = {Journal of integrative plant biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/jipb.13607}, pmid = {38158885}, issn = {1744-7909}, abstract = {Rice (Oryza sativa) is a significant crop worldwide with a genome shaped by various evolutionary factors. Rice centromeres are crucial for chromosome segregation, and contain some unreported genes. Due to the diverse and complex centromere region, a comprehensive understanding of rice centromere structure and function at the population level is needed. We constructed a high-quality centromere map based on the rice super pan-genome consisting of a 251-accession panel comprising both cultivated and wild species of Asian and African rice. We showed that rice centromeres have diverse satellite repeat CentO, which vary across chromosomes and subpopulations, reflecting their distinct evolutionary patterns. We also revealed that long terminal repeats (LTRs), especially young Gypsy-type LTRs, are abundant in the peripheral CentO-enriched regions (CoERs) and drive rice centromere expansion and evolution. Furthermore, high-quality genome assembly and complete T2T reference genome enable us to obtain more centromeric genome information despite the mapping and cloning of centromere genes is challenging. We investigated the association between structural variations (SVs) and gene expression in the rice centromere. A centromere gene, OsMAB, that positively regulates rice tiller number, was further confirmed by eQTL, haplotype analysis and CRISPR/Cas9 methods. By revealing the new insights into the evolutionary patterns and biological roles of rice centromeres, our finding will facilitate future research on centromere biology and crop improvement. This article is protected by copyright. All rights reserved.}, } @article {pmid38157192, year = {2023}, author = {Yu, Y and Chen, H}, title = {Human pangenome: far-reaching implications in precision medicine.}, journal = {Frontiers of medicine}, volume = {}, number = {}, pages = {}, pmid = {38157192}, issn = {2095-0225}, } @article {pmid38147560, year = {2024}, author = {Beavan, A and Domingo-Sananes, MR and McInerney, JO}, title = {Contingency, repeatability, and predictability in the evolution of a prokaryotic pangenome.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {121}, number = {1}, pages = {e2304934120}, doi = {10.1073/pnas.2304934120}, pmid = {38147560}, issn = {1091-6490}, support = {BB/Y513374/1//UKRI | Biotechnology and Biological Sciences Research Council (BBSRC)/ ; }, abstract = {Pangenomes exhibit remarkable variability in many prokaryotic species, much of which is maintained through the processes of horizontal gene transfer and gene loss. Repeated acquisitions of near-identical homologs can easily be observed across pangenomes, leading to the question of whether these parallel events potentiate similar evolutionary trajectories, or whether the remarkably different genetic backgrounds of the recipients mean that postacquisition evolutionary trajectories end up being quite different. In this study, we present a machine learning method that predicts the presence or absence of genes in the Escherichia coli pangenome based on complex patterns of the presence or absence of other accessory genes within a genome. Our analysis leverages the repeated transfer of genes through the E. coli pangenome to observe patterns of repeated evolution following similar events. We find that the presence or absence of a substantial set of genes is highly predictable from other genes alone, indicating that selection potentiates and maintains gene-gene co-occurrence and avoidance relationships deterministically over long-term bacterial evolution and is robust to differences in host evolutionary history. We propose that at least part of the pangenome can be understood as a set of genes with relationships that govern their likely cohabitants, analogous to an ecosystem's set of interacting organisms. Our findings indicate that intragenomic gene fitness effects may be key drivers of prokaryotic evolution, influencing the repeated emergence of complex gene-gene relationships across the pangenome.}, } @article {pmid38145107, year = {2023}, author = {Dabbaghie, F and Srikakulam, SK and Marschall, T and Kalinina, OV}, title = {PanPA: generation and alignment of panproteome graphs.}, journal = {Bioinformatics advances}, volume = {3}, number = {1}, pages = {vbad167}, pmid = {38145107}, issn = {2635-0041}, abstract = {MOTIVATION: Compared to eukaryotes, prokaryote genomes are more diverse through different mechanisms, including a higher mutation rate and horizontal gene transfer. Therefore, using a linear representative reference can cause a reference bias. Graph-based pangenome methods have been developed to tackle this problem. However, comparisons in DNA space are still challenging due to this high diversity. In contrast, amino acid sequences have higher similarity due to evolutionary constraints, whereby a single amino acid may be encoded by several synonymous codons. Coding regions cover the majority of the genome in prokaryotes. Thus, panproteomes present an attractive alternative leveraging the higher sequence similarity while not losing much of the genome in non-coding regions.

RESULTS: We present PanPA, a method that takes a set of multiple sequence alignments of protein sequences, indexes them, and builds a graph for each multiple sequence alignment. In the querying step, it can align DNA or amino acid sequences back to these graphs. We first showcase that PanPA generates correct alignments on a panproteome from 1350 Escherichia coli. To demonstrate that panproteomes allow comparisons at longer phylogenetic distances, we compare DNA and protein alignments from 1073 Salmonella enterica assemblies against E.coli reference genome, pangenome, and panproteome using BWA, GraphAligner, and PanPA, respectively; with PanPA aligning around 22% more sequences. We also aligned a DNA short-reads whole genome sequencing (WGS) sample from S.enterica against the E.coli reference with BWA and the panproteome with PanPA, where PanPA was able to find alignment for 68% of the reads compared to 5% with BWA.

PanPA is available at https://github.com/fawaz-dabbaghieh/PanPA.}, } @article {pmid38139397, year = {2023}, author = {Yin, S and Zhao, L and Liu, J and Sun, Y and Li, B and Wang, L and Ren, Z and Chen, C}, title = {Pan-genome Analysis of WOX Gene Family and Function Exploration of CsWOX9 in Cucumber.}, journal = {International journal of molecular sciences}, volume = {24}, number = {24}, pages = {}, doi = {10.3390/ijms242417568}, pmid = {38139397}, issn = {1422-0067}, support = {ZR2022MC084//Shandong Natural Science Foundation/ ; 31701923//National Natural Science Foundation of China/ ; 32372703//National Natural Science Foundation of China/ ; 32172605//National Natural Science Foundation of China/ ; }, abstract = {Cucumber is an economically important vegetable crop, and the warts (composed of spines and Tubercules) of cucumber fruit are an important quality trait that influences its commercial value. WOX transcription factors are known to have pivotal roles in regulating various aspects of plant growth and development, but their studies in cucumber are limited. Here, genome-wide identification of cucumber WOX genes was performed using the pan-genome analysis of 12 cucumber varieties. Our findings revealed diverse CsWOX genes in different cucumber varieties, with variations observed in protein sequences and lengths, gene structure, and conserved protein domains, possibly resulting from the divergent evolution of CsWOX genes as they adapt to diverse cultivation and environmental conditions. Expression profiles of the CsWOX genes demonstrated that CsWOX9 was significantly expressed in unexpanded ovaries, especially in the epidermis. Additionally, analysis of the CsWOX9 promoter revealed two binding sites for the C2H2 zinc finger protein. We successfully executed a yeast one-hybrid assay (Y1H) and a dual-luciferase (LUC) transaction assay to demonstrate that CsWOX9 can be transcriptionally activated by the C2H2 zinc finger protein Tu, which is crucial for fruit Tubercule formation in cucumber. Overall, our results indicated that CsWOX9 is a key component of the molecular network that regulates wart formation in cucumber fruits, and provide further insight into the function of CsWOX genes in cucumber.}, } @article {pmid38138105, year = {2023}, author = {Zhang, Y and Pan, M and Wang, Q and Wang, L and Liao, L}, title = {Complete Genome Sequence and Pan-Genome Analysis of Shewanella oncorhynchi Z-P2, a Siderophore Putrebactin-Producing Bacterium.}, journal = {Microorganisms}, volume = {11}, number = {12}, pages = {}, doi = {10.3390/microorganisms11122961}, pmid = {38138105}, issn = {2076-2607}, support = {2022BEC030//Technological innovation Program of Hubei Province(Major project)/ ; }, abstract = {In this study, we reported the complete genome sequence of Shewanella oncorhynchi for the first time. S. oncorhynchi Z-P2 is a bacterium that produces the siderophore putrebactin. Its genome consists of a circular chromosome of 5,034,612 bp with a G + C content of 45.4%. A total of 4544 protein-coding genes, 109 tRNAs and 31 rRNAs were annotated by the RAST. Five non-ribosomal peptide synthetase (NRPS) and polyketide synthetase (PKS) gene clusters were identified by the antiSMASH analysis. The pan-genome analysis of Z-P2 and 10 Shewanella putrefaciens revealed 9228 pan-gene clusters and 2681 core gene clusters, with Z-P2 having 618 unique gene clusters. Additionally, the gene cluster involved in putrebactin biosynthesis in Z-P2 was annotated, and the mechanism of putrebactin biosynthesis was analyzed. The putrebactin produced by Z-P2 was detected using UPLC-MS analysis, with an [M + H][+] molecular ion at m/z 373.21. These findings provide valuable support for further research on the genetic engineering of putrebactin biosynthetic genes of Z-P2 and their potential applications.}, } @article {pmid38136976, year = {2023}, author = {Serag, M and Plutino, M and Charles, P and Azulay, JP and Chaussenot, A and Paquis-Flucklinger, V and Ait-El-Mkadem Saadi, S and Rouzier, C}, title = {A Case Report of SYNE1 Deficiency-Mimicking Mitochondrial Disease and the Value of Pangenomic Investigations.}, journal = {Genes}, volume = {14}, number = {12}, pages = {}, doi = {10.3390/genes14122154}, pmid = {38136976}, issn = {2073-4425}, abstract = {Mitochondrial disorders are characterized by a huge clinical, biochemical, and genetic heterogeneity, which poses significant diagnostic challenges. Several studies report that more than 50% of patients with suspected mitochondrial disease could have a non-mitochondrial disorder. Thus, only the identification of the causative pathogenic variant can confirm the diagnosis. Herein, we describe the diagnostic journey of a family suspected of having a mitochondrial disorder who were referred to our Genetics Department. The proband presented with the association of cerebellar ataxia, COX-negative fibers on muscle histology, and mtDNA deletions. Whole exome sequencing (WES), supplemented by a high-resolution array, comparative genomic hybridization (array-CGH), allowed us to identify two pathogenic variants in the non-mitochondrial SYNE1 gene. The proband and her affected sister were found to be compound heterozygous for a known nonsense variant (c.13258C>T, p.(Arg4420Ter)), and a large intragenic deletion that was predicted to result in a loss of function. To our knowledge, this is the first report of a large intragenic deletion of SYNE1 in patients with cerebellar ataxia (ARCA1). This report highlights the interest in a pangenomic approach to identify the genetic basis in heterogeneous neuromuscular patients with the possible cause of mitochondrial disease. Moreover, even rare copy number variations should be considered in patients with a phenotype suggestive of SYNE1 deficiency.}, } @article {pmid38134602, year = {2023}, author = {Mumtaz, MN and Irfan, M and Siraj, S and Khan, A and Khan, H and Imran, M and Khan, IA and Khan, A}, title = {Whole-genome sequencing of extensively drug-resistant Salmonella enterica serovar Typhi clinical isolates from the Peshawar region of Pakistan.}, journal = {Journal of infection and public health}, volume = {17}, number = {2}, pages = {271-282}, doi = {10.1016/j.jiph.2023.12.002}, pmid = {38134602}, issn = {1876-035X}, abstract = {BACKGROUND: Typhoid fever, caused by Salmonella enterica serovar Typhi, is a significant public health concern due to the escalating of antimicrobial resistance (AMR), with limited treatment options for extensively drug-resistant (XDR) S. Typhi strains pose a serious threat to disease management and control. This study aimed to investigate the genomic characteristics, epidemiology and AMR genes of XDR S. Typhi strains from typhoid fever patients in Pakistan.

METHODOLOGY: We assessed 200 patients with enteric fever symptoms, confirming 65 S. Typhi cases through culturing and biochemical tests. Subsequent antimicrobial susceptibility testing revealed 40 cases of extensively drug-resistant (XDR) and 25 cases of multi-drug resistance (MDR). Thirteen XDR strains were selected for whole-genome sequencing, to analyze their sequence type, phylogenetics, resistance genes, pathogenicity islands, and plasmid sequences using variety of data analysis resources. Pangenome analysis was conducted for 140 XDR strains, including thirteen in-house and 127 strains reported from other regions of Pakistan, to assess their genetic diversity and functional annotation.

RESULTS: MLST analysis classified all isolates as sequence type 1 (ST-1) with 4.3.1.1. P1 genotype characterization. Prophage and Salmonella Pathogenicity Island (SPI) analysis identified intact prophages and eight SPIs involved in Salmonella's invasion and replication within host cells. Genome data analysis revealed numerous AMR genes including dfrA7, sul1, qnrS1, TEM-1, Cat1, and CTX-M-15, and SNPs associated with antibiotics resistance. IncY, IncQ1, pMAC, and pAbTS2 plasmids, conferring antimicrobial resistance, were detected in a few XDR S. Typhi strains. Phylogenetic analysis inferred a close epidemiological linkage among XDR strains from different regions of Pakistan. Pangenome was noted closed among these strains and functional annotation highlighted genes related to metabolism and pathogenesis.

CONCLUSION: This study revealed a uniform genotypic background among XDR S. Typhi strains in Pakistan, signifying a persistence transmission of a single, highly antibiotic-resistant clone. The closed pan-genome observed underscores limited genetic diversity and highlights the importance of genomic surveillance for combating drug-resistant typhoid infections.}, } @article {pmid38128825, year = {2023}, author = {Wang, J and Peng, Y and Xu, Y and Li, Z and Zhan, G and Kang, Z and Zhao, J}, title = {Pan-genome analysis reveals a highly plastic genome and extensive secreted protein polymorphism in Puccinia striiformis f. sp. Tritici.}, journal = {Journal of genetics and genomics = Yi chuan xue bao}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgg.2023.12.004}, pmid = {38128825}, issn = {1673-8527}, } @article {pmid38126779, year = {2023}, author = {Ahmed, N and Joglekar, P and Deming, C and , and Lemon, KP and Kong, HH and Segre, JA and Conlan, S}, title = {Genomic characterization of the C. tuberculostearicum species complex, a prominent member of the human skin microbiome.}, journal = {mSystems}, volume = {8}, number = {6}, pages = {e0063223}, doi = {10.1128/msystems.00632-23}, pmid = {38126779}, issn = {2379-5077}, support = {//HHS | NIH | National Human Genome Research Institute (NHGRI)/ ; //HHS | NIH | National Institute of Arthritis and Musculoskeletal and Skin Diseases (NIAMS)/ ; }, abstract = {Amplicon sequencing data combined with isolate whole genome sequencing have expanded our understanding of Corynebacterium on the skin. Healthy human skin is colonized by a diverse collection of Corynebacterium species, but Corynebacterium tuberculostearicum predominates on many skin sites. Our work supports the emerging idea that C. tuberculostearicum is a species complex encompassing several distinct species. We produced a collection of genomes that help define this complex, including a potentially new species we term Corynebacterium hallux based on a preference for sites on the feet, whole-genome average nucleotide identity, pangenomic analysis, and growth in skin-like media. This isolate collection and high-quality genome resource set the stage for developing engineered strains for both basic and translational clinical studies.}, } @article {pmid38125681, year = {2023}, author = {Charron, P and Gao, R and Chmara, J and Hoover, E and Nadin-Davis, S and Chauvin, D and Hazelwood, J and Makondo, K and Duceppe, MO and Kang, M}, title = {Influence of genomic variations on glanders serodiagnostic antigens using integrative genomic and transcriptomic approaches.}, journal = {Frontiers in veterinary science}, volume = {10}, number = {}, pages = {1217135}, pmid = {38125681}, issn = {2297-1769}, abstract = {Glanders is a highly contagious and life-threatening zoonotic disease caused by Burkholderia mallei (B. mallei). Without an effective vaccine or treatment, early diagnosis has been regarded as the most effective method to prevent glanders transmission. Currently, the diagnosis of glanders is heavily reliant on serological tests. However, given that markedly different host immune responses can be elicited by genetically different strains of the same bacterial species, infection by B. mallei, whose genome is unstable and plastic, may result in various immune responses. This variability can make the serodiagnosis of glanders challenging. Therefore, there is a need for a comprehensive understanding and assessment of how B. mallei genomic variations impact the appropriateness of specific target antigens for glanders serodiagnosis. In this study, we investigated how genomic variations in the B. mallei genome affect gene content (gene presence/absence) and expression, with a special focus on antigens used or potentially used in serodiagnosis. In all the genome sequences of B. mallei isolates available in NCBI's RefSeq database (accessed in July 2023) and in-house sequenced samples, extensive small and large variations were observed when compared to the type strain ATCC 23344. Further pan-genome analysis of those assemblies revealed variations of gene content among all available genomes of B. mallei. Specifically, differences in gene content ranging from 31 to 715 genes with an average of 334 gene presence-absence variations were found in strains with complete or chromosome-level genome assemblies, using the ATCC 23344 strain as a reference. The affected genes included some encoded proteins used as serodiagnostic antigens, which were lost due mainly to structural variations. Additionally, a transcriptomic analysis was performed using the type strain ATCC 23344 and strain Zagreb which has been widely utilized to produce glanders antigens. In total, 388 significant differentially expressed genes were identified between these two strains, including genes related to bacterial pathogenesis and virulence, some of which were associated with genomic variations, particularly structural variations. To our knowledge, this is the first comprehensive study to uncover the impacts of genetic variations of B. mallei on its gene content and expression. These differences would have significant impacts on host innate and adaptive immunity, including antibody production, during infection. This study provides novel insights into B. mallei genetic variants, knowledge which will help to improve glanders serodiagnosis.}, } @article {pmid38122983, year = {2023}, author = {Mondol, SM and Islam, MR and Rakhi, NN and Shakil, SK and Islam, I and Mustary, JF and Amiruzzaman, and Shahjalal, HM and Gomes, DJ and Rahaman, MM}, title = {Unveiling a High-Risk Epidemic Clone (ST 357) of 'Difficult to Treat Extensively Drug-Resistant' (DT-XDR) Pseudomonas aeruginosa from a burn patient in Bangladesh: A Resilient Beast Revealing Co-existence of Four Classes of Beta Lactamases.}, journal = {Journal of global antimicrobial resistance}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jgar.2023.11.014}, pmid = {38122983}, issn = {2213-7173}, abstract = {OBJECTIVES: Pseudomonas aeruginosa (P. aeruginosa) stands out as a key culprit in the colonization of burn wounds, instigating grave infections of heightened severity. In this study, we have performed comparative whole genome analysis of a difficult to treat extensively drug resistant (DT-XDR) P. aeruginosa isolated from a burn patient in order to elucidate genomic diversity, molecular patterns, mechanisms and genes responsible for conferring antimicrobial resistance and virulence.

METHOD: P. aeruginosa SHNIBPS206 was isolated from an infected burn wound of a critically injured burn patient. Whole genome sequencing was carried out and annotated with Prokka. Sequence type, serotype, antimicrobial resistance genes and mechanisms, virulence genes, metal resistance genes and CRISPR/Cas systems were investigated. Later, pangenome analysis was carried out to find out genomic diversity.

RESULT: P. aeruginosa SHNIBPS206 (MLST 357, Serotype O11)) was resistant to 14 antibiotics including carbapenems and harboured all four classes of beta lactamase producing genes: Class A (blaPME-1, blaVEB-9), Class B (blaNDM-1), Class C (blaPDC-11) and Class D (blaOXA-846). Mutational analysis of Porin D gave valuable insights. Several efflux pump, virulence and metal resistance genes were also detected. Pangenome analysis revealed high genomic diversity among different strains of P. aeruginosa.

CONCLUSION: To our knowledge, this is the first report of an extensively drug resistant ST 357 P. aeruginosa from Bangladesh, which is an epidemic high-risk P. aeruginosa clone. Further research and in-depth comprehensive studies are required to investigate the prevalence of such high-risk clone of P. aeruginosa in Bangladesh.}, } @article {pmid38117845, year = {2023}, author = {Hollensteiner, J and Schneider, D and Poehlein, A and Brinkhoff, T and Daniel, R}, title = {Pan-genome analysis of six Paracoccus type strain genomes reveal lifestyle traits.}, journal = {PloS one}, volume = {18}, number = {12}, pages = {e0287947}, doi = {10.1371/journal.pone.0287947}, pmid = {38117845}, issn = {1932-6203}, abstract = {The genus Paracoccus capable of inhabiting a variety of different ecological niches both, marine and terrestrial, is globally distributed. In addition, Paracoccus is taxonomically, metabolically and regarding lifestyle highly diverse. Until now, little is known on how Paracoccus can adapt to such a range of different ecological niches and lifestyles. In the present study, the genus Paracoccus was phylogenomically analyzed (n = 160) and revisited, allowing species level classification of 16 so far unclassified Paracoccus sp. strains and detection of five misclassifications. Moreover, we performed pan-genome analysis of Paracoccus-type strains, isolated from a variety of ecological niches, including different soils, tidal flat sediment, host association such as the bluespotted cornetfish, Bugula plumosa, and the reef-building coral Stylophora pistillata to elucidate either i) the importance of lifestyle and adaptation potential, and ii) the role of the genomic equipment and niche adaptation potential. Six complete genomes were de novo hybrid assembled using a combination of short and long-read technologies. These Paracoccus genomes increase the number of completely closed high-quality genomes of type strains from 15 to 21. Pan-genome analysis revealed an open pan-genome composed of 13,819 genes with a minimal chromosomal core (8.84%) highlighting the genomic adaptation potential and the huge impact of extra-chromosomal elements. All genomes are shaped by the acquisition of various mobile genetic elements including genomic islands, prophages, transposases, and insertion sequences emphasizing their genomic plasticity. In terms of lifestyle, each mobile genetic elements should be evaluated separately with respect to the ecological context. Free-living genomes, in contrast to host-associated, tend to comprise (1) larger genomes, or the highest number of extra-chromosomal elements, (2) higher number of genomic islands and insertion sequence elements, and (3) a lower number of intact prophage regions. Regarding lifestyle adaptations, free-living genomes share genes linked to genetic exchange via T4SS, especially relevant for Paracoccus, known for their numerous extrachromosomal elements, enabling adaptation to dynamic environments. Conversely, host-associated genomes feature diverse genes involved in molecule transport, cell wall modification, attachment, stress protection, DNA repair, carbon, and nitrogen metabolism. Due to the vast number of adaptive genes, Paracoccus can quickly adapt to changing environmental conditions.}, } @article {pmid38113358, year = {2023}, author = {Bourdin, A and Toutée, A and Fardeau, C}, title = {Intravenous Immunoglobulins for Bilateral Retinochoroiditis in Rhinovirus Infection: A Case Report.}, journal = {Ophthalmic surgery, lasers & imaging retina}, volume = {54}, number = {12}, pages = {720-722}, doi = {10.3928/23258160-20231019-02}, pmid = {38113358}, issn = {2325-8179}, abstract = {A 43-year-old woman presented bilateral anterior granulomatous uveitis associated with bilateral disc edema and bilateral peripheral retinochoroidal lesions. Intravenous corticosteroids after negative investigations for infectious causes did not prevent spreading of the lesions and retinal atrophy. A diagnostic vitrectomy with vitreous analysis, including pan-genomic, next-generation sequencing showed a positive result for rhinovirus HRV B91, and the cytological analysis was suggestive of infection. Intravenous immunoglobulins associated with pegylated interferon-alpha strongly slowed the progression of the lesions and led to scarred and atrophic aspect in both eyes after 6 months. [Ophthalmic Surg Lasers Imaging Retina 2023;54:720-722.].}, } @article {pmid38112751, year = {2023}, author = {Gould, AL and Henderson, JB}, title = {Comparative genomics of symbiotic Photobacterium using highly contiguous genome assemblies from long read sequences.}, journal = {Microbial genomics}, volume = {9}, number = {12}, pages = {}, doi = {10.1099/mgen.0.001161}, pmid = {38112751}, issn = {2057-5858}, abstract = {This study presents the assembly and comparative genomic analysis of luminous Photobacterium strains isolated from the light organs of 12 fish species using Oxford Nanopore Technologies (ONT) sequencing. The majority of assemblies achieved chromosome-level continuity, consisting of one large (>3 Mbp) and one small (~1.5 Mbp) contig, with near complete BUSCO scores along with varying plasmid sequences. Leveraging this dataset, this study significantly expanded the available genomes for P. leiognathi and its subspecies P. 'mandapamensis', enabling a comparative genomic analysis between the two lineages. An analysis of the large and small chromosomes unveiled distinct patterns of core and accessory genes, with a larger fraction of the core genes residing on the large chromosome, supporting the hypothesis of secondary chromosome evolution from megaplasmids in Vibrionaceae. In addition, we discovered a proposed new species, Photobacterium acropomis sp. nov., isolated from an acropomatid host, with an average nucleotide identify (ANI) of 93 % compared to the P. leiognathi and P. 'mandapamensis' strains. A comparison of the P. leiognathi and P. 'mandapamensis' lineages revealed minimal differences in gene content, yet highlighted the former's larger genome size and potential for horizontal gene transfer. An investigation of the lux-rib operon, responsible for light production, indicated congruence between the presence of luxF and host family, challenging its role in differentiating P. 'mandapamensis' from P. leiognathi. Further insights were derived from the identification of metabolic differences, such as the presence of the NADH:quinone oxidoreductase respiratory complex I in P. leiognathi as well as variations in the type II secretion system (T2S) genes between the lineages, potentially impacting protein secretion and symbiosis. In summary, this study advances our understanding of Photobacterium genome evolution, highlighting subtle differences between closely related lineages, specifically P. leiognathi and P. 'mandapamensis'. These findings highlight the benefit of long read sequencing for bacterial genome assembly and pangenome analysis and provide a foundation for exploring early bacterial speciation processes of these facultative light organ symbionts.}, } @article {pmid38111050, year = {2023}, author = {Cochetel, N and Minio, A and Guarracino, A and Garcia, JF and Figueroa-Balderas, R and Massonnet, M and Kasuga, T and Londo, JP and Garrison, E and Gaut, BS and Cantu, D}, title = {A super-pangenome of the North American wild grape species.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {290}, pmid = {38111050}, issn = {1474-760X}, support = {1741627//National Science Foundation/ ; }, abstract = {BACKGROUND: Capturing the genetic diversity of wild relatives is crucial for improving crops because wild species are valuable sources of agronomic traits that are essential to enhance the sustainability and adaptability of domesticated cultivars. Genetic diversity across a genus can be captured in super-pangenomes, which provide a framework for interpreting genomic variations.

RESULTS: Here we report the sequencing, assembly, and annotation of nine wild North American grape genomes, which are phased and scaffolded at chromosome scale. We generate a reference-unbiased super-pangenome using pairwise whole-genome alignment methods, revealing the extent of the genomic diversity among wild grape species from sequence to gene level. The pangenome graph captures genomic variation between haplotypes within a species and across the different species, and it accurately assesses the similarity of hybrids to their parents. The species selected to build the pangenome are a great representation of the genus, as illustrated by capturing known allelic variants in the sex-determining region and for Pierce's disease resistance loci. Using pangenome-wide association analysis, we demonstrate the utility of the super-pangenome by effectively mapping short reads from genus-wide samples and identifying loci associated with salt tolerance in natural populations of grapes.

CONCLUSIONS: This study highlights how a reference-unbiased super-pangenome can reveal the genetic basis of adaptive traits from wild relatives and accelerate crop breeding research.}, } @article {pmid38110716, year = {2023}, author = {Vogan, K}, title = {Refining the apple pan-genome.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, doi = {10.1038/s41588-023-01629-y}, pmid = {38110716}, issn = {1546-1718}, } @article {pmid38107860, year = {2023}, author = {Jiang, ZM and Mou, T and Sun, Y and Su, J and Yu, LY and Zhang, YQ}, title = {Environmental distribution and genomic characteristics of Solirubrobacter, with proposal of two novel species.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1267771}, pmid = {38107860}, issn = {1664-302X}, abstract = {Solirubrobacter spp. were abundant in soil samples collected from deserts and other areas with high UV radiation. In addition, a novel Solirubrobacter species, with strain CPCC 204708[T] as the type, was isolated and identified from sandy soil sample collected from the Badain Jaran Desert of the Inner Mongolia autonomous region. Strain CPCC 204708[T] was Gram-stain positive, rod-shaped, non-motile, non-spore-forming, and grew optimally at 28-30°C, pH 7.0-8.0, and in the absence of NaCl. Analysis of the 16S rRNA gene sequence of strain CPCC 204708[T] showed its identity within the genus Solirubrobacter, with highest nucleotide similarities (97.4-98.2%) to other named Solirubrobacter species. Phylogenetic and genomic analyses indicated that the strain was most closely related to Solirubrobacter phytolaccae KCTC 29190[T], while represented a distinct species, as confirmed from physiological properties and comparison. The name Solirubrobacter deserti sp. nov. was consequently proposed, with CPCC 204708[T] (= DSM 105495[T] = NBRC 112942[T]) as the type strain. Genomic analyses of the Solirubrobacter spp. also suggested that Solirubrobacter sp. URHD0082 represents a novel species, for which the name Candidatus "Solirubrobacter pratensis" sp. nov. was proposed. Genomic analysis of CPCC 204708[T] revealed the presence of genes related to its adaptation to the harsh environments of deserts and may also harbor genes functional in plant-microbe interactions. Pan-genomic analysis of available Solirubrobacter spp. confirmed the presence of many of the above genes as core components of Solirubrobacter genomes and suggests they may possess beneficial potential for their associate plant and may be important resources for bioactive compounds.}, } @article {pmid38105952, year = {2023}, author = {Newcomer, EP and Fishbein, SRS and Zhang, K and Hink, T and Reske, KA and Cass, C and Iqbal, ZH and Struttmann, EL and Dubberke, ER and Dantas, G}, title = {Genomic surveillance of Clostridioides difficile transmission and virulence in a healthcare setting.}, journal = {medRxiv : the preprint server for health sciences}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.09.26.23295023}, pmid = {38105952}, abstract = {Clostridioides difficile infection (CDI) is a major cause of healthcare-associated diarrhea, despite the widespread implementation of contact precautions for patients with CDI. Here, we investigate strain contamination in a hospital setting and genomic determinants of disease outcomes. Across two wards over six months, we selectively cultured C. difficile from patients (n=384) and their environments. Whole-genome sequencing (WGS) of 146 isolates revealed that most C. difficile isolates were from clade 1 (131/146, 89.7%), while only one isolate of the hypervirulent ST1 was recovered. Of culture-positive admissions (n=79), 19 (24%) of patients were colonized with toxigenic C. difficile on admission to the hospital. We defined 25 strain networks at ≤ 2 core gene SNPs; 2 of these networks contain strains from different patients. Strain networks were temporally linked (p<0.0001). To understand genomic correlates of disease, we conducted WGS on an additional cohort of C. difficile (n=102 isolates) from the same hospital and confirmed that clade 1 isolates are responsible for most CDI cases. We found that while toxigenic C. difficile isolates are associated with the presence of cdtR , nontoxigenic isolates have an increased abundance of prophages. Our pangenomic analysis of clade 1 isolates suggests that while toxin genes (tcdABER and cdtR) were associated with CDI symptoms, they are dispensable for patient colonization. These data indicate toxigenic and nontoxigenic C. difficile contamination persists in a hospital setting and highlight further investigation into how accessory genomic repertoires contribute to C. difficile colonization and disease.}, } @article {pmid38103051, year = {2023}, author = {Kim, YH and Park, J and Chung, HS}, title = {Genetic characterization of tetracycline-resistant Staphylococcus aureus with reduced vancomycin susceptibility using whole-genome sequencing.}, journal = {Archives of microbiology}, volume = {206}, number = {1}, pages = {24}, pmid = {38103051}, issn = {1432-072X}, support = {2020R1C1C1013823//National Research Foundation of Korea (NRF) grant funded by the Korea government (MSIT)/ ; }, abstract = {This study aimed to analyze the genetic characteristics of Staphylococcus aureus with reduced vancomycin susceptibility (RVS-SA). Whole-genome sequencing was performed on 27 RVS-SA clinical isolates, and comparative genomic analysis was performed using S. aureus reference strains. Pan-genome orthologous groups (POGs) were identified that were present in RVS-SA but absent in the reference strains, but further analysis showed that the presence of these POGs was influenced by tetracycline resistance rather than vancomycin resistance. Therefore, we restricted our analysis to tetracycline-resistant (tetR) RVS-SA and tetR vancomycin-susceptible S. aureus (VSSA). Phylogenomic analysis showed them to be closely related, and further analysis revealed the presence of an uncharacterized protein SAB0394 and the absence of lytA in tetR RVS-SA, which are involved in cell wall thickening. In summary, using whole-genome sequencing we identified gain or loss of genes in tetR RVS-SA strains. These findings provide insights into the investigation of mechanisms associated with reduced vancomycin susceptibility and have the potential to contribute to the development of molecular biomarkers for the rapid and efficient detection of RVS-SA.}, } @article {pmid38084888, year = {2023}, author = {Do, VH and Nguyen, SH and Le, DQ and Nguyen, TT and Nguyen, CH and Ho, TH and Vo, NS and Nguyen, T and Nguyen, HA and Cao, MD}, title = {Pasa: leveraging population pangenome graph to scaffold prokaryote genome assemblies.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad1170}, pmid = {38084888}, issn = {1362-4962}, support = {VINIF.2019.DA11//VINIF/ ; }, abstract = {Whole genome sequencing has increasingly become the essential method for studying the genetic mechanisms of antimicrobial resistance and for surveillance of drug-resistant bacterial pathogens. The majority of bacterial genomes sequenced to date have been sequenced with Illumina sequencing technology, owing to its high-throughput, excellent sequence accuracy, and low cost. However, because of the short-read nature of the technology, these assemblies are fragmented into large numbers of contigs, hindering the obtaining of full information of the genome. We develop Pasa, a graph-based algorithm that utilizes the pangenome graph and the assembly graph information to improve scaffolding quality. By leveraging the population information of the bacteria species, Pasa is able to utilize the linkage information of the gene families of the species to resolve the contig graph of the assembly. We show that our method outperforms the current state of the arts in terms of accuracy, and at the same time, is computationally efficient to be applied to a large number of existing draft assemblies.}, } @article {pmid38076784, year = {2023}, author = {Vaddadi, NSK and Mun, T and Langmead, B}, title = {Minimizing Reference Bias with an Impute-First Approach.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.11.30.568362}, pmid = {38076784}, abstract = {Pangenome indexes reduce reference bias in sequencing data analysis. However, a greater reduction in bias can be achieved using a personalized reference, e.g. a diploid human reference constructed to match a donor individual's alleles. We present a novel impute-first alignment framework that combines elements of genotype imputation and pangenome alignment. It begins by genotyping the individual from a sub-sample of the input reads. It next uses a reference panel and efficient imputation algorithm to impute a personalized diploid reference. Finally, it indexes the personalized reference and applies a read aligner, which could be a linear or graph aligner, to align the full read set to the personalized reference. This frame-work has higher variant-calling recall (99.54% vs. 99.37%), precision (99.36% vs. 99.18%), and F1 (99.45% vs. 99.28%) compared to a graph-based pangenome. The personalized reference is also smaller and faster to query compared to a pangenome index, making it an overall advantageous choice for whole-genome DNA sequencing experiments.}, } @article {pmid38075907, year = {2023}, author = {Lan, Y and Liu, M and Song, Y and Cao, Y and Li, F and Luo, D and Qiao, D}, title = {Distribution, characterization, and evolution of heavy metal resistance genes and Tn7-like associated heavy metal resistance Gene Island of Burkholderia.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1252127}, pmid = {38075907}, issn = {1664-302X}, abstract = {INTRODUCTION: Burkholderia is a rod-shaped aerobic Gram-negative bacteria with considerable genetic and metabolic diversity, which can beused for bioremediation and production applications, and has great biotechnology potential. However, there are few studies on the heavy metal resistance of the Burkholderia genus.

METHODS: In this paper, the distribution, characteristics and evolution of heavy metal resistance genes in Burkholderia and the gene island of Tn7-like transposable element associated with heavy metal resistance genes in Burkholderia were studied by comparative genomic method based on the characteristics of heavy metal resistance.

RESULTS AND DISCUSSION: The classification status of some species of the Burkholderia genus was improved, and it was found that Burkholderia dabaoshanensis and Burkholderia novacaledonica do not belong to the Burkholderia genus.Secondly, comparative genomics studies and pan-genome analysis found that the core genome of Burkholderia has alarger proportion of heavy metal resistance genes and a greater variety of heavy metalresistance genes than the subsidiary genome and strain specific genes. Heavy metal resistance genes are mostly distributed in the genome in the form of various gene clusters (for example, mer clusters, ars clusters, czc/cusABC clusters). At the same time, transposase, recombinase, integrase and other genes were foundupstream and downstream of heavy metal gene clusters, indicating that heavy metal resistance genes may beobtained through horizontal transfer. The analysis of natural selection pressure of heavy metal resistance genes showed that heavy metal resistance genes experienced strong purification selection under purification selection pressure in the genome.The Tn7 like transposable element of Burkholderia was associated with the heavy metal resistance gene island, and there were a large number of Tn7 transposable element insertion events in genomes. At the same time, BGI metal gene islands related to heavy metal resistance genes of Tn7 like transposable element were found, and these gene islands were only distributed in Burkholderia cepacia, Burkholderia polyvora, and Burkholderia contaminant.}, } @article {pmid38075893, year = {2023}, author = {You, M and Zhao, Q and Liu, Y and Zhang, W and Shen, Z and Ren, Z and Xu, C}, title = {Insights into lignocellulose degradation: comparative genomics of anaerobic and cellulolytic Ruminiclostridium-type species.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1288286}, pmid = {38075893}, issn = {1664-302X}, abstract = {Mesophilic, anaerobic, and cellulolytic Ruminiclostridium-type bacterial species can secrete an extracellular, multi-enzyme machinery cellulosome, which efficiently degrades cellulose. In this study, we first reported the complete genome of Ruminiclostridium papyrosolvens DSM2782, a single circular 5,027,861-bp chromosome with 37.1% G + C content, and compared it with other Ruminiclostridium-type species. Pan-genome analysis showed that Ruminiclostridium-type species share a large number of core genes to conserve basic functions, although they have a high level of intraspecific genetic diversity. Especially, KEGG mapping revealed that Ruminiclostridium-type species mainly use ABC transporters regulated by two-component systems (TCSs) to absorb extracellular sugars but not phosphotransferase systems (PTSs) that are employed by solventogenic clostridia, such as Clostridium acetobutylicum. Furthermore, we performed comparative analyses of the species-specific repertoire of CAZymes for each of the Ruminiclostridium-type species. The high similarity of their cohesins suggests a common ancestor and potential cross-species recognition. Additionally, both differences between the C-terminal cohesins and other cohesins of scaffoldins and between the dockerins linking with cellulases and other catalytic domains indicate a preference for the location of cellulosomal catalytic subunits at scaffoldins. The information gained in this study may be utilized directly or developed further by genetic engineering and optimizing enzyme systems or cell factories for enhanced biotechnological biomass deconstruction and biofuel production.}, } @article {pmid38075891, year = {2023}, author = {Zhu, X and Lu, Q and Li, Y and Long, Q and Zhang, X and Long, X and Cao, D}, title = {Contraction and expansion dynamics: deciphering genomic underpinnings of growth rate and pathogenicity in Mycobacterium.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1292897}, pmid = {38075891}, issn = {1664-302X}, abstract = {BACKGROUND: Mycobacterium bacteria, encompassing both slow growth (SGM) and rapid growth mycobacteria (RGM), along with true pathogenic (TP), opportunistic pathogenic (OP), and non-pathogenic (NP) types, exhibit diverse phenotypes. Yet, the genetic underpinnings of these variations remain elusive.

METHODS: Here, We conducted a comprehensive comparative genomics study involving 53 Mycobacterium species to unveil the genomic drivers behind growth rate and pathogenicity disparities.

RESULTS: Our core/pan-genome analysis highlighted 1,307 shared gene families, revealing an open pan-genome structure. A phylogenetic tree highlighted clear boundaries between SGM and RGM, as well as TP and other species. Gene family contraction emerged as the primary alteration associated with growth and pathogenicity transitions. Specifically, ABC transporters for amino acids and inorganic ions, along with quorum sensing genes, exhibited significant contractions in SGM species, potentially influencing their distinct traits. Conversely, TP strains displayed contraction in lipid and secondary metabolite biosynthesis and metabolism-related genes. Across the 53 species, we identified 26 core and 64 accessory virulence factors. Remarkably, TP and OP strains stood out for their expanded mycobactin biosynthesis and type VII secretion system gene families, pivotal for their pathogenicity.

CONCLUSION: Our findings underscore the importance of gene family contraction in nucleic acids, ions, and substance metabolism for host adaptation, while emphasizing the significance of virulence gene family expansion, including type VII secretion systems and mycobactin biosynthesis, in driving mycobacterial pathogenicity.}, } @article {pmid38075871, year = {2023}, author = {Pham, A and Volmer, JG and Chambers, DC and Smith, DJ and Reid, DW and Burr, L and Wells, TJ}, title = {Genomic analyses of Burkholderia respiratory isolates indicates two evolutionarily distinct B. anthina clades.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1274280}, pmid = {38075871}, issn = {1664-302X}, abstract = {INTRODUCTION: The Burkholderia cepacia complex (BCC) encompasses a group of at least 22 genetically distinct gram-negatives bacterial species ubiquitous in nature. Recognised as a group of genetically and phenotypically flexible species, the BCC inhabits diverse ecological niches causing both plant and human diseases. Comparative genomic analysis provides an in depth understanding into the population biology, phylogenetic relationship, and genomic architecture of species.

METHODS: Here, we genomically characterise Burkholderia anthina isolated from patients with chronic lung infections, an understudied pathogen within the Burkholderia cepacia complex.

RESULTS: We demonstrate that B. anthina is polyphyletic and constitutes two distinct evolutionary lineages. Core- and pan-genome analyses demonstrated substantial metabolic diversity, with B. anthina Clade I enriched in genes associated with microbial metabolism in diverse environments, including degradation of aromatic compounds and metabolism of xenobiotics, while B. anthina Clade II demonstrated an enhanced capability for siderophore biosynthesis.

DISCUSSION: Based on our phylogenetic and comparative genomic analyses, we suggest stratifying B. anthina to recognise a distinct species harbouring increased potential for iron metabolism via siderophore synthesis, for which we propose the name Burkholderia anthinoferum (sp. nov.).}, } @article {pmid38071270, year = {2023}, author = {Minich, JJ and Moore, ML and Allsing, NA and Aylward, A and Murray, ER and Tran, L and Michael, TP}, title = {Generating high-quality plant and fish reference genomes from field-collected specimens by optimizing preservation.}, journal = {Communications biology}, volume = {6}, number = {1}, pages = {1246}, pmid = {38071270}, issn = {2399-3642}, support = {INV-040541/GATES/Bill & Melinda Gates Foundation/United States ; }, abstract = {Sample preservation often impedes efforts to generate high-quality reference genomes or pangenomes for Earth's more than 2 million plant and animal species due to nucleotide degradation. Here we compare the impacts of storage methods including solution type, temperature, and time on DNA quality and Oxford Nanopore long-read sequencing quality in 9 fish and 4 plant species. We show 95% ethanol largely protects against degradation for fish blood (22 °C, ≤6 weeks) and plant tissue (4 °C, ≤3 weeks). From this furthest storage timepoint, we assemble high-quality reference genomes of 3 fish and 2 plant species with contiguity (contig N50) and completeness (BUSCO) that achieve the Vertebrate Genome Project benchmarking standards. For epigenetic applications, we also report methylation frequency compared to liquid nitrogen control. The results presented here remove the necessity for cryogenic storage in many long read applications and provide a framework for future studies focused on sampling in remote locations, which may represent a large portion of the future sequencing of novel organisms.}, } @article {pmid38071267, year = {2023}, author = {Norman, M and Chen, C and Miah, H and Patpour, M and Sørensen, C and Hovmøller, M and Forrest, K and Kumar, S and Prasad, P and Gangwar, OP and Bhardwaj, S and Bariana, H and Periyannan, S and Bansal, U}, title = {Sr65: a widely effective gene for stem rust resistance in wheat.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {137}, number = {1}, pages = {1}, pmid = {38071267}, issn = {1432-2242}, abstract = {Sr65 in chromosome 1A of Indian wheat landrace Hango-2 is a potentially useful all-stage resistance gene that currently protects wheat from stem rust in Australia, India, Africa and Europe. Stem rust, caused by Puccinia graminis f. sp. tritici (Pgt), threatened global wheat production with the appearance of widely virulent races that included TTKSK and TTRTF. Indian landrace Hango-2 showed resistance to Pgt races in India and Australia. Screening of a Hango-2/Avocet 'S' (AvS) recombinant inbred line population identified two stem rust resistance genes, a novel gene (temporarily named as SrH2) from Hango-2 and Sr26 from AvS. A mapping population segregating for SrH2 alone was developed from two recombinant lines. SrH2 was mapped on the short arm of chromosome 1A, where it was flanked by KASP markers KASP_7944 (proximal) and KASP_12147 (distal). SrH2 was delimited to an interval of 1.8-2.3 Mb on chromosome arm 1AS. The failure to detect candidate genes through MutRenSeq and comparative genomic analysis with the pan-genome dataset indicated the necessity to generate a Hango-2 specific assembly for detecting the gene sequence linked with SrH2 resistance. MutRenSeq however enabled identification of SrH2-linked KASP marker sunCS_265. Markers KASP_12147 and sunCS_265 showed 92% and 85% polymorphism among an Australian cereal cultivar diversity panel and can be used for marker-assisted selection of SrH2 in breeding programs. The effectiveness of SrH2 against Pgt races from Europe, Africa, India, and Australia makes it a valuable resource for breeding stem rust-resistant wheat cultivars. Since no wheat-derived gene was previously located in chromosome arm 1AS, SrH2 represents a new locus and named as SR65.}, } @article {pmid38070563, year = {2023}, author = {Lau, NS and Furusawa, G}, title = {Polysaccharide degradation in Cellvibrionaceae: Genomic insights of the novel chitin-degrading marine bacterium, strain KSP-S5-2, and its chitinolytic activity.}, journal = {The Science of the total environment}, volume = {}, number = {}, pages = {169134}, doi = {10.1016/j.scitotenv.2023.169134}, pmid = {38070563}, issn = {1879-1026}, abstract = {In this study, we present the genome characterization of a novel chitin-degrading strain, KSP-S5-2, and comparative genomics of 33 strains of Cellvibrionaceae. Strain KSP-S5-2 was isolated from mangrove sediment collected in Balik Pulau, Penang, Malaysia, and its 16S rRNA gene sequence showed the highest similarity (95.09%) to Teredinibacter franksiae. Genome-wide analyses including 16S rRNA gene sequence similarity, average nucleotide identity, digital DNA-DNA hybridization, and phylogenomics, suggested that KSP-S5-2 represents a novel species in the family Cellvibrionaceae. The Cellvibrionaceae pan-genome exhibited high genomic variability, with only 1.7 % representing the core genome, while the flexible genome showed a notable enrichment of genes related to carbohydrate metabolism and transport pathway. This observation sheds light on the genetic plasticity of the Cellvibrionaceae family and the gene pools that form the basis for the evolution of polysaccharide-degrading capabilities. Comparative analysis of the carbohydrate-active enzymes across Cellvibrionaceae strains revealed that the chitinolytic system is not universally present within the family, as only 18 of the 33 genomes encoded chitinases. Strain KSP-S5-2 displayed an expanded repertoire of chitinolytic enzymes (25 GH18, two GH19 chitinases, and five GH20 β-N-acetylhexosaminidases) but lacked genes for agar, xylan, and pectin degradation, indicating specialized enzymatic machinery focused primarily on chitin degradation. Further, the strain degraded 90 % of chitin after 10 days of incubation. In summary, our findings provided insights into strain KSP-S5-2's genomic potential, the genetics of its chitinolytic system, genomic diversity within the Cellvibrionaceae family in terms of polysaccharide degradation, and its application for chitin degradation.}, } @article {pmid38070037, year = {2023}, author = {Aziz, K and Gilbert, JA and Zaidi, AH}, title = {Genomic and Phenotypic Insight into the Probiotic Potential of Lactic Acid Bacterial spp. Associated with the Human Gut Mucosa.}, journal = {Probiotics and antimicrobial proteins}, volume = {}, number = {}, pages = {}, pmid = {38070037}, issn = {1867-1314}, support = {SIG # S10 OD026929/NH/NIH HHS/United States ; }, abstract = {Commensal microbiome-based health support is gaining respect in the medical community and new human gut-associated Lactic Acid Bacteria (LAB) strains must be evaluated for their probiotic potential. Here we characterized the phenotype and genomes of human ileocecal mucosa-associated LAB strains using metagenomic sequencing and in vitro testing. The strains characterized belonged to the genus Enterococcus (Enterococcus lactis NPL1366, NPL1371, and Enterococcus mundtii NPL1379) and Lactobacillus (Lactobacillus paragasseri, NPL1369, NPL1370, and Lactiplantibacillus plantarum NPL1378). Genome annotation suggested bacterial adaptation to both human physiological and industrial manufacturing-related stressors. Genes for histidine kinases in enterococci and Na + /K + antiporters and F0F1 ATP synthases in Lactobacillus strains may support their tolerance to acid seen in vitro. The bile salt hydrolase (BSH) gene in Lp. plantarum and L. paragasseri may help explain their reported bile salt deconjugation and cholesterol-lowering behavior. Thioredoxin is the principal antioxidant system, and several oxidases and general stress-related proteins are found in lactobacilli, most notably in L. plantarum NPL1378. Multiple adhesion and biofilm-related genes were predicted in the LAB genomes. Adhesion and biofilm-related genes figured prominently in the genomes of enterococcal strains, especially E. lactis, corresponding to its biofilm formation capacity in vitro. Bacteriocin and secondary metabolite biosynthetic gene clusters in the sequenced genomes of E. lactis NPL1366 and Lp. plantarum NPL1378 may explain their in vitro pathogenic antagonism. Moreover, folate producing Lp. plantarum strain holds potential to be used in therapeutics or biofortification of food. All the strains were deemed safe through in vitro and in silico analysis. This basic genetic and phenotypic information supports their contention as probiotic adjuncts to conventional medical therapy.}, } @article {pmid38070010, year = {2023}, author = {Gómez-Sánchez, I and Castelán-Sánchez, HG and Martínez-Castilla, LP and Hurtado-Ramírez, JM and López-Leal, G}, title = {Genetic insights into the microevolutionary dynamics and early introductions of human monkeypox virus in Mexico.}, journal = {Archives of virology}, volume = {169}, number = {1}, pages = {2}, pmid = {38070010}, issn = {1432-8798}, abstract = {The recent global outbreak of mpox, caused by monkeypox virus (MPV) emerged in Europe in 2022 and rapidly spread to over 40 countries. The Americas are currently facing the highest impact, reporting over 50,000 cases by early 2023. In this study, we analyzed 880 MPV isolates worldwide to gain insights into the evolutionary patterns and initial introduction events of the virus in Mexico. We found that MPV entered Mexico on multiple occasions, from the United Kingdom, Portugal, and Canada, and subsequently spread locally in different regions of Mexico. Additionally, we show that MPV has an open pangenome, highlighting the role of gene turnover in shaping its genomic diversity, rather than single-nucleotide polymorphisms (SNPs), which do not contribute significantly to genome diversity. Although the genome contains multiple SNPs in coding regions, these remain under purifying selection, suggesting their evolutionary conservation. One notable exception is amino acid position 63 of the protein encoded by the Cop-A4L gene, which is intricately related to viral maturity, which we found to be under strong positive selection. Ancestral state reconstruction indicated that the ancestral state at position 63 corresponds to the amino acid valine, which is present only in isolates of clade I. However, the isolates from the current outbreak contained threonine at position 63. Our findings contribute new information about the evolution of monkeypox virus.}, } @article {pmid38069258, year = {2023}, author = {Lyu, K and Xiao, J and Lyu, S and Liu, R}, title = {Comparative Analysis of Transposable Elements in Strawberry Genomes of Different Ploidy Levels.}, journal = {International journal of molecular sciences}, volume = {24}, number = {23}, pages = {}, doi = {10.3390/ijms242316935}, pmid = {38069258}, issn = {1422-0067}, abstract = {Transposable elements (TEs) make up a large portion of plant genomes and play a vital role in genome structure, function, and evolution. Cultivated strawberry (Fragaria x ananassa) is one of the most important fruit crops, and its octoploid genome was formed through several rounds of genome duplications from diploid ancestors. Here, we built a pan-genome TE library for the Fragaria genus using ten published strawberry genomes at different ploidy levels, including seven diploids, one tetraploid, and two octoploids, and performed comparative analysis of TE content in these genomes. The TEs comprise 51.83% (F. viridis) to 60.07% (F. nilgerrensis) of the genomes. Long terminal repeat retrotransposons (LTR-RTs) are the predominant TE type in the Fragaria genomes (20.16% to 34.94%), particularly in F. iinumae (34.94%). Estimating TE content and LTR-RT insertion times revealed that species-specific TEs have shaped each strawberry genome. Additionally, the copy number of different LTR-RT families inserted in the last one million years reflects the genetic distance between Fragaria species. Comparing cultivated strawberry subgenomes to extant diploid ancestors showed that F. vesca and F. iinumae are likely the diploid ancestors of the cultivated strawberry, but not F. viridis. These findings provide new insights into the TE variations in the strawberry genomes and their roles in strawberry genome evolution.}, } @article {pmid38069099, year = {2023}, author = {Shemesh-Mayer, E and Faigenboim, A and Sherman, A and Gao, S and Zeng, Z and Liu, T and Kamenetsky-Goldstein, R}, title = {Deprivation of Sexual Reproduction during Garlic Domestication and Crop Evolution.}, journal = {International journal of molecular sciences}, volume = {24}, number = {23}, pages = {}, doi = {10.3390/ijms242316777}, pmid = {38069099}, issn = {1422-0067}, abstract = {Garlic, originating in the mountains of Central Asia, has undergone domestication and subsequent widespread introduction to diverse regions. Human selection for adaptation to various climates has resulted in the development of numerous garlic varieties, each characterized by specific morphological and physiological traits. However, this process has led to a loss of fertility and seed production in garlic crops. In this study, we conducted morpho-physiological and transcriptome analyses, along with whole-genome resequencing of 41 garlic accessions from different regions, in order to assess the variations in reproductive traits among garlic populations. Our findings indicate that the evolution of garlic crops was associated with mutations in genes related to vernalization and the circadian clock. The decline in sexual reproduction is not solely attributed to a few mutations in specific genes, but is correlated with extensive alterations in the genetic regulation of the annual cycle, stress adaptations, and environmental requirements. The regulation of flowering ability, stress response, and metabolism occurs at both the genetic and transcriptional levels. We conclude that the migration and evolution of garlic crops involve substantial and diverse changes across the entire genome landscape. The construction of a garlic pan-genome, encompassing genetic diversity from various garlic populations, will provide further insights for research into and the improvement of garlic crops.}, } @article {pmid38062402, year = {2023}, author = {Liu, Q and Ye, L and Li, M and Wang, Z and Xiong, G and Ye, Y and Tu, T and Schwarzacher, T and Heslop-Harrison, JSP}, title = {Genome-wide expansion and reorganization during grass evolution: from 30 Mb chromosomes in rice and Brachypodium to 550 Mb in Avena.}, journal = {BMC plant biology}, volume = {23}, number = {1}, pages = {627}, pmid = {38062402}, issn = {1471-2229}, support = {32070359, 32370402//National Natural Science Foundation of China/ ; 2021A1515012410//Basic and Applied Basic Research Foundation of Guangdong Province/ ; KCJH-80107-2023-148//Sciences Innovative Training Programs for Undergraduates of Chinese Academy of Sciences/ ; GDZZDC20228704//Guangdong Provincial Special Fund for Natural Resource Affairs on Ecology and Forestry Construction/ ; Y861041001//Overseas Distinguished Scholar Project of South China Botanical Garden, Chinese Academy of Sciences/ ; BB/P02307X/1//Global Challenges Research Foundation for Global Agricultural and Food Systems Research/ ; }, abstract = {BACKGROUND: The BOP (Bambusoideae, Oryzoideae, and Pooideae) clade of the Poaceae has a common ancestor, with similarities to the genomes of rice, Oryza sativa (2n = 24; genome size 389 Mb) and Brachypodium, Brachypodium distachyon (2n = 10; 271 Mb). We exploit chromosome-scale genome assemblies to show the nature of genomic expansion, structural variation, and chromosomal rearrangements from rice and Brachypodium, to diploids in the tribe Aveneae (e.g., Avena longiglumis, 2n = 2x = 14; 3,961 Mb assembled to 3,850 Mb in chromosomes).

RESULTS: Most of the Avena chromosome arms show relatively uniform expansion over the 10-fold to 15-fold genome-size increase. Apart from non-coding sequence diversification and accumulation around the centromeres, blocks of genes are not interspersed with blocks of repeats, even in subterminal regions. As in the tribe Triticeae, blocks of conserved synteny are seen between the analyzed species with chromosome fusion, fission, and nesting (insertion) events showing deep evolutionary conservation of chromosome structure during genomic expansion. Unexpectedly, the terminal gene-rich chromosomal segments (representing about 50 Mb) show translocations between chromosomes during speciation, with homogenization of genome-specific repetitive elements within the tribe Aveneae. Newly-formed intergenomic translocations of similar extent are found in the hexaploid A. sativa.

CONCLUSIONS: The study provides insight into evolutionary mechanisms and speciation in the BOP clade, which is valuable for measurement of biodiversity, development of a clade-wide pangenome, and exploitation of genomic diversity through breeding programs in Poaceae.}, } @article {pmid38062371, year = {2023}, author = {Chenhaka, LH and Van Wyk, DAB and Mienie, C and Bezuidenhout, CC and Lekota, KE}, title = {The phylogenomic landscape of extended-spectrum β-lactamase producing Citrobacter species isolated from surface water.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {755}, pmid = {38062371}, issn = {1471-2164}, abstract = {BACKGROUND: Citrobacter species are Gram-negative opportunistic pathogens commonly reported in nosocomial-acquired infections. This study characterised four Citrobacter species that were isolated from surface water in the North West Province, South Africa.

RESULTS: Phenotypic antimicrobial susceptibility profiles of the isolates demonstrated their ability to produce the extended-spectrum β-lactamase (ESBL). Whole genomes were sequenced to profile antibiotic resistance and virulence genes, as well as mobile genetic elements. In silico taxonomic identification was conducted by using multi-locus sequence typing and average nucleotide identity. A pangenome was used to determine the phylogenomic landscape of the Citrobacter species by using 109 publicly available genomes. The strains S21 and S23 were identified as C. braakii, while strains S24 and S25 were C. murliniae and C. portucalensis, respectively. Comparative genomics and sequenced genomes of the ESBL-producing isolates consisted of n = 91; 83% Citrobacter species in which bla-CMY-101 (n = 19; 32,2%) and bla-CMY-59 (n = 12; 38,7%) were prevalent in C. braakii, and C. portucalensis strains, respectively. Macrolide (acrAB-TolC, and mdtG) and aminoglycoside (acrD) efflux pumps genes were identified in the four sequenced Citrobacter spp. isolates. The quinolone resistance gene, qnrB13, was exclusive to the C. portucalensis S25 strain. In silico analysis detected plasmid replicon types IncHI1A, IncP, and Col(VCM04) in C. murliniae S24 and C. portucalensis S25, respectively. These potentially facilitate the T4SS secretion system in Citrobacter species. In this study, the C. braakii genomes could be distinguished from C. murliniae and C. portucalensis on the basis of gene encoding for cell surface localisation of the CPS (vexC) and identification of genes involved in capsule polymer synthesis (tviB and tviE). A cluster for the salmochelin siderophore system (iro-BCDEN) was found in C. murliniae S24. This is important when it comes to the pathogenicity pathway that confers an advantage in colonisation.

CONCLUSIONS: The emerging and genomic landscapes of these ESBL-producing Citrobacter species are of significant concern due to their dissemination potential in freshwater systems. The presence of these ESBL and multidrug-resistant (MDR) pathogens in aquatic environments is of One Health importance, since they potentially impact the clinical domain, that is, in terms of human health and the agricultural domain, that is, in terms of animal health and food production as well as the environmental domain.}, } @article {pmid38062354, year = {2023}, author = {Hochstedler-Kramer, BR and Ene, A and Putonti, C and Wolfe, AJ}, title = {Comparative genomic analysis of clinical Enterococcus faecalis distinguishes strains isolated from the bladder.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {752}, pmid = {38062354}, issn = {1471-2164}, support = {U2CDK129917 and TL1DK132769//National Institute of Diabetes and Digestive Kidney Diseases of the National Institutes of Health/ ; }, abstract = {BACKGROUND: Enterococcus faecalis is the most commonly isolated enterococcal species in clinical infection. This bacterium is notorious for its ability to share genetic content within and outside of its species. With this increased proficiency for horizontal gene transfer, tremendous genomic diversity within this species has been identified. Many researchers have hypothesized E. faecalis exhibits niche adaptation to establish infections or colonize various parts of the human body. Here, we hypothesize that E. faecalis strains isolated from the human bladder will carry unique genomic content compared to clinical strains isolated from other sources.

RESULTS: This analysis includes comparison of 111 E. faecalis genomes isolated from bladder, urogenital, blood, and fecal samples. Phylogenomic comparison shows no association between isolation source and lineage; however, accessory genome comparison differentiates blood and bladder genomes. Further gene enrichment analysis identifies gene functions, virulence factors, antibiotic resistance genes, and plasmid-associated genes that are enriched or rare in bladder genomes compared to urogenital, blood, and fecal genomes. Using these findings as training data and 682 publicly available genomes as test data, machine learning classifiers successfully distinguished between bladder and non-bladder strains with high accuracy. Genes identified as important for this differentiation were often related to transposable elements and phage, including 3 prophage species found almost exclusively in bladder and urogenital genomes.

CONCLUSIONS: E. faecalis strains isolated from the bladder contain unique genomic content when compared to strains isolated from other body sites. This genomic diversity is most likely due to horizontal gene transfer, as evidenced by lack of phylogenomic clustering and enrichment of transposable elements and prophages. Investigation into how these enriched genes influence host-microbe interactions may elucidate gene functions required for successful bladder colonization and disease establishment.}, } @article {pmid38059630, year = {2023}, author = {Allegretti, YH and Yamaji, R and Adams-Sapper, S and Riley, LW}, title = {Genetic features of antimicrobial drug-susceptible extraintestinal pathogenic Escherichia coli pandemic sequence type 95.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0418922}, doi = {10.1128/spectrum.04189-22}, pmid = {38059630}, issn = {2165-0497}, abstract = {Despite the increasing prevalence of antibiotic-resistant Escherichia coli strains that cause urinary tract and bloodstream infections, a major pandemic lineage of extraintestinal pathogenic E. coli (ExPEC) ST95 has a comparatively low frequency of drug resistance. We compared the genomes of 1,749 ST95 isolates to identify genetic features that may explain why most strains of ST95 resist becoming drug-resistant. Identification of such genomic features could contribute to the development of novel strategies to prevent the spread of antibiotic-resistant genes and devise new measures to control antibiotic-resistant infections.}, } @article {pmid38057566, year = {2023}, author = {Zhu, F and Yin, ZT and Zhao, QS and Sun, YX and Jie, YC and Smith, J and Yang, YZ and Burt, DW and Hincke, M and Zhang, ZD and Yuan, MD and Kaufman, J and Sun, CJ and Li, JY and Shao, LW and Yang, N and Hou, ZC}, title = {A chromosome-level genome assembly for the Silkie chicken resolves complete sequences for key chicken metabolic, reproductive, and immunity genes.}, journal = {Communications biology}, volume = {6}, number = {1}, pages = {1233}, pmid = {38057566}, issn = {2399-3642}, abstract = {A set of high-quality pan-genomes would help identify important genes that are still hidden/incomplete in bird reference genomes. In an attempt to address these issues, we have assembled a de novo chromosome-level reference genome of the Silkie (Gallus gallus domesticus), which is an important avian model for unique traits, like fibromelanosis, with unclear genetic foundation. This Silkie genome includes the complete genomic sequences of well-known, but unresolved, evolutionarily, endocrinologically, and immunologically important genes, including leptin, ovocleidin-17, and tumor-necrosis factor-α. The gap-less and manually annotated MHC (major histocompatibility complex) region possesses 38 recently identified genes, with differentially regulated genes recovered in response to pathogen challenges. We also provide whole-genome methylation and genetic variation maps, and resolve a complex genetic region that may contribute to fibromelanosis in these animals. Finally, we experimentally show leptin binding to the identified leptin receptor in chicken, confirming an active leptin ligand-receptor system. The Silkie genome assembly not only provides a rich data resource for avian genome studies, but also lays a foundation for further functional validation of resolved genes.}, } @article {pmid38053559, year = {2023}, author = {Esteves, MAC and Viana, AS and Viçosa, GN and Botelho, AMN and Moustafa, AM and Mansoldo, FRP and Ferreira, ALP and Vermelho, AB and Ferreira-Carvalho, BT and Planet, PJ and Figueiredo, AMS}, title = {RdJ detection tests to identify a unique MRSA clone of ST105-SCCmecII lineage and its variants disseminated in the metropolitan region of Rio de Janeiro.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1275918}, pmid = {38053559}, issn = {1664-302X}, abstract = {Hospital bloodstream infection (BSI) caused by methicillin-resistant Staphylococcus aureus (MRSA) is a major cause of morbidity and mortality and is frequently related to invasive procedures and medically complex patients. An important feature of MRSA is the clonal structure of its population. Specific MRSA clones may differ in their pathogenic, epidemiological, and antimicrobial resistance profiles. Whole-genome sequencing is currently the most robust and discriminatory technique for tracking hypervirulent/well-adapted MRSA clones. However, it remains an expensive and time-consuming technique that requires specialized personnel. In this work, we describe a pangenome protocol, based on binary matrix (1,0) of open reading frames (ORFs), that can be used to quickly find diagnostic, apomorphic sequence mutations that can serve as biomarkers. We use this technique to create a diagnostic screen for MRSA isolates circulating in the Rio de Janeiro metropolitan area, the RdJ clone, which is prevalent in BSI. The method described here has 100% specificity and sensitivity, eliminating the need to use genomic sequencing for clonal identification. The protocol used is relatively simple and all the steps, formulas and commands used are described in this work, such that this strategy can also be used to identify other MRSA clones and even clones from other bacterial species.}, } @article {pmid38049764, year = {2023}, author = {Tian, X and Teo, WFA and Wee, WY and Yang, Y and Ahmed, H and Jakubovics, NS and Choo, SW and Tan, GYA}, title = {Genome characterization and taxonomy of Actinomyces acetigenes sp. nov., and Actinomyces stomatis sp. nov., previously isolated from the human oral cavity.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {734}, pmid = {38049764}, issn = {1471-2164}, support = {WB20211227000125//Wenzhou Municipal Key Laboratory for Applied Biomedical and the Biopharmaceutical Informatics/ ; WB20210429000008//Zhejiang Bioinformatics International Science and Technology Cooperation Center at Wenzhou-Kean University/ ; 5000105//The high-level talent recruitment program for academic and research platform construction from Wenzhou-Kean University/ ; }, abstract = {BACKGROUND: Actinomyces strains are commonly found as part of the normal microflora on human tissue surfaces, including the oropharynx, gastrointestinal tract, and female genital tract. Understanding the diversity and characterization of Actinomyces species is crucial for human health, as they play an important role in dental plaque formation and biofilm-related infections. Two Actinomyces strains ATCC 49340[ T] and ATCC 51655[ T] have been utilized in various studies, but their accurate species classification and description remain unresolved.

RESULTS: To investigate the genomic properties and taxonomic status of these strains, we employed both 16S rRNA Sanger sequencing and whole-genome sequencing using the Illumina HiSeq X Ten platform with PE151 (paired-end) sequencing. Our analyses revealed that the draft genome of Actinomyces acetigenes ATCC 49340[ T] was 3.27 Mbp with a 68.0% GC content, and Actinomyces stomatis ATCC 51655[ T] has a genome size of 3.08 Mbp with a 68.1% GC content. Multi-locus (atpA, rpoB, pgi, metG, gltA, gyrA, and core genome SNPs) sequence analysis supported the phylogenetic placement of strains ATCC 51655[ T] and ATCC 49340[ T] as independent lineages. Digital DNA-DNA hybridization (dDDH), average nucleotide identity (ANI), and average amino acid identity (AAI) analyses indicated that both strains represented novel Actinomyces species, with values below the threshold for species demarcation (70% dDDH, 95% ANI and AAI). Pangenome analysis identified 5,731 gene clusters with strains ATCC 49340[ T] and ATCC 51655[ T] possessing 1,515 and 1,518 unique gene clusters, respectively. Additionally, genomic islands (GIs) prediction uncovered 24 putative GIs in strain ATCC 49340[ T] and 16 in strain ATCC 51655[ T], contributing to their genetic diversity and potential adaptive capabilities. Pathogenicity analysis highlighted the potential human pathogenicity risk associated with both strains, with several virulence-associated factors identified. CRISPR-Cas analysis exposed the presence of CRISPR and Cas genes in both strains, indicating these strains might evolve a robust defense mechanism against them.

CONCLUSION: This study supports the classification of strains ATCC 49340[ T] and ATCC 51655[ T] as novel species within the Actinomyces, in which the name Actinomyces acetigenes sp. nov. (type strain ATCC 49340[ T] = VPI D163E-3[ T] = CCUG 34286[ T] = CCUG 35339 [T]) and Actinomyces stomatis sp. nov. (type strain ATCC 51655[ T] = PK606[T] = CCUG 33930[ T]) are proposed.}, } @article {pmid38048088, year = {2023}, author = {Chai, K and Chen, S and Wang, P and Kong, W and Ma, X and Zhang, X}, title = {Multiomics Analysis Reveals the Genetic Basis of Volatile Terpenoid Formation in Oolong Tea.}, journal = {Journal of agricultural and food chemistry}, volume = {}, number = {}, pages = {}, doi = {10.1021/acs.jafc.3c06762}, pmid = {38048088}, issn = {1520-5118}, abstract = {Oolong tea has gained great popularity in China due to its pleasant floral and fruity aromas. Although numerous studies have investigated the aroma differences across various tea cultivars, the genetic mechanism is unclear. This study performed multiomics analysis of three varieties suitable for oolong tea and three others with different processing suitability. Our analysis revealed that oolong tea varieties contained higher levels of cadinane sesquiterpenoids. PanTFBS was developed to identify variants of transcription factor binding sites (TFBSs). We found that the CsDCS gene had two TFBS variants in the promoter sequence and a single nucleotide polymorphism (SNP) in the coding sequence. Integrating data on genetic variations, gene expression, and protein-binding sites indicated that CsDCS might be a pivotal gene involved in the biosynthesis of cadinane sesquiterpenoids. These findings advance our understanding of the genetic factors involved in the aroma formation of oolong tea and offer insights into the enhancement of tea aroma.}, } @article {pmid38047471, year = {2023}, author = {Kumar, K and Barbora, L and Moholkar, VS}, title = {Genomic insights into clostridia in bioenergy production: Comparison of metabolic capabilities and evolutionary relationships.}, journal = {Biotechnology and bioengineering}, volume = {}, number = {}, pages = {}, doi = {10.1002/bit.28610}, pmid = {38047471}, issn = {1097-0290}, abstract = {Bacteria from diverse genera, including Acetivibrio, Bacillus, Cellulosilyticum, Clostridium, Desulfotomaculum, Lachnoclostridium, Moorella, Ruminiclostridium, and Thermoanaerobacterium, have attracted significant attention due to their versatile metabolic capabilities encompassing acetogenic, cellulolytic, and C1 -metabolic properties, and acetone-butanol-ethanol fermentation. Despite their biotechnological significance, a comprehensive understanding of clostridial physiology and evolution has remained elusive. This study reports an extensive comparative genomic analysis of 48 fully sequenced bacterial genomes from these genera. Our investigation, encompassing pan-genomic analysis, central carbon metabolism comparison, exploration of general genome features, and in-depth scrutiny of Cluster of Orthologous Groups genes, has established a holistic whole-genome-based phylogenetic framework. We have classified these strains into acetogenic, butanol-producing, cellulolytic, CO2 -fixating, chemo(litho/organo)trophic, and heterotrophic categories, often exhibiting overlaps. Key outcomes include the identification of misclassified species and the revelation of insights into metabolic features, energy conservation, substrate utilization, stress responses, and regulatory mechanisms. These findings can provide guidance for the development of efficient microbial systems for sustainable bioenergy production. Furthermore, by addressing fundamental questions regarding genetic relationships, conserved genomic features, pivotal enzymes, and essential genes, this study has also contributed to our comprehension of clostridial biology, evolution, and their shared metabolic potential.}, } @article {pmid38046854, year = {2023}, author = {Zhang, X and Chen, Y and Wang, L and Yuan, Y and Fang, M and Shi, L and Lu, R and Comes, HP and Ma, Y and Chen, Y and Huang, G and Zhou, Y and Zheng, Z and Qiu, Y}, title = {Pangenome of water caltrop reveals structural variations and asymmetric subgenome divergence after allopolyploidization.}, journal = {Horticulture research}, volume = {10}, number = {11}, pages = {uhad203}, pmid = {38046854}, issn = {2662-6810}, abstract = {Water caltrop (Trapa spp., Lythraceae) is a traditional but currently underutilized non-cereal crop. Here, we generated chromosome-level genome assemblies for the two diploid progenitors of allotetraploid Trapa. natans (4x, AABB), i.e., diploid T. natans (2x, AA) and Trapa incisa (2x, BB). In conjunction with four published (sub)genomes of Trapa, we used gene-based and graph-based pangenomic approaches and a pangenomic transposable element (TE) library to develop Trapa genomic resources. The pangenome displayed substantial gene-content variation with dispensable and private gene clusters occupying a large proportion (51.95%) of the total cluster sets in the six (sub)genomes. Genotyping of presence-absence variation (PAVs) identified 40 453 PAVs associated with 2570 genes specific to A- or B-lineages, of which 1428 were differentially expressed, and were enriched in organ development process, organic substance metabolic process and response to stimulus. Comparative genome analyses showed that the allotetraploid T. natans underwent asymmetric subgenome divergence, with the B-subgenome being more dominant than the A-subgenome. Multiple factors, including PAVs, asymmetrical amplification of TEs, homeologous exchanges (HEs), and homeolog expression divergence, together affected genome evolution after polyploidization. Overall, this study sheds lights on the genome architecture and evolution of Trapa, and facilitates its functional genomic studies and breeding program.}, } @article {pmid38045253, year = {2023}, author = {Salamzade, R and Kalan, LR}, title = {skDER: microbial genome dereplication approaches for comparative and metagenomic applications.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.09.27.559801}, pmid = {38045253}, abstract = {skDER (https://github.com/raufs/skDER) combines recent advances to efficiently estimate average nucleotide identity (ANI) between thousands of microbial genomes by skani [1] with two low-memory methods for genomic dereplication. The first method implements a dynamic algorithm to determine a concise set of representative genomes. This approach is well-suited for selecting reference genomes to align metagenomic reads onto for tracking strain presence across related microbiome samples. This is because fewer representative genomes should alleviate the concern that reads belonging to the same strain get falsely partitioned across closely related genomes. The other method, which uses a greedy approach, is better suited for use in comparative genomics, where users might be overwhelmed with the high number of genomes available for certain taxa and aim to reduce redundancy and, therefore, computational requirements for downstream analytics. This method selects a larger number of representative genomes to comprehensively sample the pangenome space for the taxon of interest. To further aid usage for comparative genomics studies, skDER also features an option to automatically download genomes classified as a particular species or genus in the Genome Taxonomy Database [2-4] and we provide precomputed representative genomes for commonly studied bacterial taxa [5] .}, } @article {pmid38040628, year = {2023}, author = {Liu, X and Wu, Z and Hu, T and Lin, X and Liang, H and Li, W and Jin, X and Xiao, L and Fang, X and Zou, Y}, title = {Comparative genomic analysis reveals niche adaption of Lactobacillus acidophilus.}, journal = {Journal of applied microbiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/jambio/lxad287}, pmid = {38040628}, issn = {1365-2672}, abstract = {AIMS: Lactobacillus acidophilus has been extensively applied in plentiful probiotic products. Although several studies have been performed to investigate the beneficial characteristics and genome function of L. acidophilus, comparative genomic analysis remains scarce. In this study, we collected 74 L. acidophilus genomes from our gut bacterial genome collection and the public database and conducted a comprehensive comparative genomic analysis.

METHODS AND RESULTS: This study revealed the potential correlation of the genomic diversity and niche adaptation of L. acidophilus from different perspectives. The pan-genome of L. acidophilus was found to be open, with metabolism, information storage and processing genes mainly distributed in the core genome. Phage- and peptidase-associated genes were found in the genome of the specificity of animal-derived strains, which were related to adaptation of animal gut. SNP analysis showed the differences of the utilization of vitamin B12 in cellular of L. acidophilus strains from animal gut and others.

CONCLUSIONS: This work provides new insights for the genomic diversity analysis of Lactobacillus acidophilus and uncovers the ecological adaptation of the specific strains.}, } @article {pmid38037131, year = {2023}, author = {Andreace, F and Lechat, P and Dufresne, Y and Chikhi, R}, title = {Comparing methods for constructing and representing human pangenome graphs.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {274}, pmid = {38037131}, issn = {1474-760X}, support = {ANR-22-CE45-0007//ANR Full-RNA/ ; ANR-19-CE45-0008//SeqDigger/ ; PIA/ANR16-CONV-0005//Inception/ ; ANR-19-P3IA-0001//PRAIRIE/ ; 956229//H2020 Marie Skłodowska-Curie Actions/ ; 872539//H2020 Marie Skłodowska-Curie Actions/ ; }, abstract = {BACKGROUND: As a single reference genome cannot possibly represent all the variation present across human individuals, pangenome graphs have been introduced to incorporate population diversity within a wide range of genomic analyses. Several data structures have been proposed for representing collections of genomes as pangenomes, in particular graphs.

RESULTS: In this work, we collect all publicly available high-quality human haplotypes and construct the largest human pangenome graphs to date, incorporating 52 individuals in addition to two synthetic references (CHM13 and GRCh38). We build variation graphs and de Bruijn graphs of this collection using five of the state-of-the-art tools: Bifrost, mdbg, Minigraph, Minigraph-Cactus and pggb. We examine differences in the way each of these tools represents variations between input sequences, both in terms of overall graph structure and representation of specific genetic loci.

CONCLUSION: This work sheds light on key differences between pangenome graph representations, informing end-users on how to select the most appropriate graph type for their application.}, } @article {pmid38036791, year = {2023}, author = {Chen, J and Liu, Y and Liu, M and Guo, W and Wang, Y and He, Q and Chen, W and Liao, Y and Zhang, W and Gao, Y and Dong, K and Ren, R and Yang, T and Zhang, L and Qi, M and Li, Z and Zhao, M and Wang, H and Wang, J and Qiao, Z and Li, H and Jiang, Y and Liu, G and Song, X and Deng, Y and Li, H and Yan, F and Dong, Y and Li, Q and Li, T and Yang, W and Cui, J and Wang, H and Zhou, Y and Zhang, X and Jia, G and Lu, P and Zhi, H and Tang, S and Diao, X}, title = {Pangenome analysis reveals genomic variations associated with domestication traits in broomcorn millet.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {38036791}, issn = {1546-1718}, abstract = {Broomcorn millet (Panicum miliaceum L.) is an orphan crop with the potential to improve cereal production and quality, and ensure food security. Here we present the genetic variations, population structure and diversity of a diverse worldwide collection of 516 broomcorn millet genomes. Population analysis indicated that the domesticated broomcorn millet originated from its wild progenitor in China. We then constructed a graph-based pangenome of broomcorn millet based on long-read de novo genome assemblies of 32 representative accessions. Our analysis revealed that the structural variations were highly associated with transposable elements, which influenced gene expression when located in the coding or regulatory regions. We also identified 139 loci associated with 31 key domestication and agronomic traits, including candidate genes and superior haplotypes, such as LG1, for panicle architecture. Thus, the study's findings provide foundational resources for developing genomics-assisted breeding programs in broomcorn millet.}, } @article {pmid38035008, year = {2023}, author = {Muhammad, SA and Guo, J and Noor, K and Mustafa, A and Amjad, A and Bai, B}, title = {Pangenomic and immunoinformatics based analysis of Nipah virus revealed CD4[+] and CD8[+] T-Cell epitopes as potential vaccine candidates.}, journal = {Frontiers in pharmacology}, volume = {14}, number = {}, pages = {1290436}, doi = {10.3389/fphar.2023.1290436}, pmid = {38035008}, issn = {1663-9812}, abstract = {Introduction: Nipah (NiV) is the zoonotic deadly bat-borne virus that causes neurological and respiratory infections which ultimately lead to death. There are 706 infected cases reported up till now especially in Asia, out of which 409 patients died. There is no vaccine and effective treatment available for NiV infections and we have to timely design such strategies as world could not bear another pandemic situation. Methods: In this study, we screened viral proteins of NiV strains based on pangenomics analysis, antigenicity, molecular weight, and sub-cellular localization. The immunoproteomics based approach was used to predict T-cell epitopes of MHC class-I and II as potential vaccine candidates. These epitopes are capable to activate CD4[+], CD8[+], and T-cell dependent B-lymphocytes. Results: The two surface proteins including fusion glycoprotein (F) and attachment glycoprotein (G) are antigenic with molecular weights of 60 kDa and 67 kDa respectively. Three epitopes of F protein (VNYNSEGIA, PNFILVRNT, and IKMIPNVSN) were ranked and selected based on the binding affinity with MHC class-I, and 3 epitopes (VILNKRYYS, ILVRNTLIS, and VKLQETAEK) with MHC-II molecules. Similarly, for G protein, 3 epitopes each for MHC-I (GKYDKVMPY, ILKPKLISY, and KNKIWCISL) and MHC-II (LRNIEKGKY, FLIDRINWI, and FLLKNKIWC) with substantial binding energies were predicted. Based on the physicochemical properties, all these epitopes are non-toxic, hydrophilic, and stable. Conclusion: Our vaccinomics and system-level investigation could help to trigger the host immune system to prevent NiV infection.}, } @article {pmid38033569, year = {2023}, author = {Feng, L and Zhang, M and Fan, Z}, title = {Population genomic analysis of clinical ST15 Klebsiella pneumoniae strains in China.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1272173}, doi = {10.3389/fmicb.2023.1272173}, pmid = {38033569}, issn = {1664-302X}, abstract = {ST15 Klebsiella pneumoniae (Kpn) is a growing public health concern in China and worldwide, yet its genomic and evolutionary dynamics in this region remain poorly understood. This study comprehensively elucidates the population genomics of ST15 Kpn in China by analyzing 287 publicly available genomes. The proportion of the genomes increased sharply from 2012 to 2021, and 92.3% of them were collected from the Yangtze River Delta (YRD) region of eastern China. Carbapenemase genes, including OXA-232, KPC-2, and NDM, were detected in 91.6% of the studied genomes, and 69.2% of which were multidrug resistant (MDR) and hypervirulent (hv). Phylogenetic analysis revealed four clades, C1 (KL112, 59.2%), C2 (mainly KL19, 30.7%), C3 (KL48, 0.7%) and C4 (KL24, 9.4%). C1 appeared in 2007 and was OXA-232-producing and hv; C2 and C4 appeared between 2005 and 2007, and both were KPC-2-producing but with different levels of virulence. Transmission clustering detected 86.1% (n = 247) of the enrolled strains were grouped into 55 clusters (2-159 strains) and C1 was more transmissible than others. Plasmid profiling revealed 88 plasmid clusters (PCs) that were highly heterogeneous both between and within clades. 60.2% (n = 53) of the PCs carrying AMR genes and 7 of which also harbored VFs. KPC-2, NDM and OXA-232 were distributed across 14, 4 and 1 PCs, respectively. The MDR-hv strains all carried one of two homologous PCs encoding iucABCD and rmpA2 genes. Pangenome analysis revealed two major coinciding accessory components predominantly located on plasmids. One component, associated with KPC-2, encompassed 15 additional AMR genes, while the other, linked to OXA-232, involved seven more AMR genes. This study provides essential insights into the genomic evolution of the high-risk ST15 CP-Kpn strains in China and warrants rigorous monitoring.}, } @article {pmid38029170, year = {2023}, author = {Wu, F and Zhang, T and Wu, Q and Li, X and Zhang, M and Luo, X and Zhang, Y and Lu, R}, title = {Complete genome sequence and comparative analysis of a Vibrio vulnificus strain isolated from a clinical patient.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1240835}, doi = {10.3389/fmicb.2023.1240835}, pmid = {38029170}, issn = {1664-302X}, abstract = {Vibrio vulnificus is an opportunistic, global pathogen that naturally inhabits sea water and is responsible for most vibriosis-related deaths. We investigated the genetic characteristics of V. vulnificus isolated from the clinical blood culture specimen of a patient with hepatitis B virus cirrhosis in 2018 (named as V. vulnificus VV2018) by whole genome sequencing (WGS). VV2018 belonged to a novel sequencing type 620 (ST620) and comprised two circular chromosomes, containing 4,389 potential coding sequences (CDSs) and 152 RNA genes. The phylogenetic tree of single nucleotide polymorphisms (SNPs) using 26 representative genomes revealed that VV2108 grouped with two other V. vulnificus strains isolated from humans. The pan-genome of V. vulnificus was constructed using 26 representative genomes to elucidate their genetic diversity, evolutionary characteristics, and virulence and antibiotic resistance profiles. The pan-genome analysis revealed that VV2018 shared a total of 3,016 core genes (≥99% presence), including 115 core virulence factors (VFs) and 5 core antibiotic resistance-related genes, and 309 soft core genes (≥95 and <99% presence) with 25 other V. vulnificus strains. The varG gene might account for the cefazolin resistance, and comparative analysis of the genetic context of varG revealed that two genes upstream and downstream of varG were conserved. The glycosylation (pgl) like genes were found in VV2018 compared with Pgl-related proteins in Neisseria that might affect the adherence of the strain in hosts. The comparative analysis of VV2018 would contribute to a better understanding of the virulence and antibiotic resistance profiles of V. vulnificus. Meanwhile much work remains to be done to better understand the function of pgl-like genes in V. vulnificus.}, } @article {pmid38029151, year = {2023}, author = {Cai, X and Peng, Y and Yang, G and Feng, L and Tian, X and Huang, P and Mao, Y and Xu, L}, title = {Populational genomic insights of Paraclostridium bifermentans as an emerging human pathogen.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1293206}, doi = {10.3389/fmicb.2023.1293206}, pmid = {38029151}, issn = {1664-302X}, abstract = {Paraclostridium bifermentans (P.b) is an emerging human pathogen that is phylogenomically close to Paeniclostridium sordellii (P.s), while their populational genomic features and virulence capacity remain understudied. Here, we performed comparative genomic analyses of P.b and compared their pan-genomic features and virulence coding profiles to those of P.s. Our results revealed that P.b has a more plastic pangenome, a larger genome size, and a higher GC content than P.s. Interestingly, the P.b and P.s share similar core-genomic functions, but P.b encodes more functions in nutrient metabolism and energy conversion and fewer functions in host defense in their accessory-genomes. The P.b may initiate extracellular infection processes similar to those of P.s and Clostridium perfringens by encoding three toxin homologs (i.e., microbial collagenase, thiol-activated cytolysin, phospholipase C, which are involved in extracellular matrices degradation and membrane damaging) in their core-genomes. However, P.b is less toxic than the P.s by encoding fewer secretion toxins in the core-genome and fewer lethal toxins in the accessory-genome. Notably, P.b carries more toxins genes in their accessory-genomes, particularly those of plasmid origin. Moreover, three within-species and highly conserved plasmid groups, encoding virulence, gene acquisition, and adaptation, were carried by 25-33% of P.b strains and clustered by isolation source rather than geography. This study characterized the pan-genomic virulence features of P.b for the first time, and revealed that P. bifermentans is an emerging pathogen that can threaten human health in many aspects, emphasizing the importance of phenotypic and genomic characterizations of in situ clinical isolates.}, } @article {pmid38029109, year = {2023}, author = {Crosby, KC and Rojas, M and Sharma, P and Johnson, MA and Mazloom, R and Kvitko, BH and Smits, THM and Venter, SN and Coutinho, TA and Heath, LS and Palmer, M and Vinatzer, BA}, title = {Genomic delineation and description of species and within-species lineages in the genus Pantoea.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1254999}, doi = {10.3389/fmicb.2023.1254999}, pmid = {38029109}, issn = {1664-302X}, abstract = {As the name of the genus Pantoea ("of all sorts and sources") suggests, this genus includes bacteria with a wide range of provenances, including plants, animals, soils, components of the water cycle, and humans. Some members of the genus are pathogenic to plants, and some are suspected to be opportunistic human pathogens; while others are used as microbial pesticides or show promise in biotechnological applications. During its taxonomic history, the genus and its species have seen many revisions. However, evolutionary and comparative genomics studies have started to provide a solid foundation for a more stable taxonomy. To move further toward this goal, we have built a 2,509-gene core genome tree of 437 public genome sequences representing the currently known diversity of the genus Pantoea. Clades were evaluated for being evolutionarily and ecologically significant by determining bootstrap support, gene content differences, and recent recombination events. These results were then integrated with genome metadata, published literature, descriptions of named species with standing in nomenclature, and circumscriptions of yet-unnamed species clusters, 15 of which we assigned names under the nascent SeqCode. Finally, genome-based circumscriptions and descriptions of each species and each significant genetic lineage within species were uploaded to the LINbase Web server so that newly sequenced genomes of isolates belonging to any of these groups could be precisely and accurately identified.}, } @article {pmid38029097, year = {2023}, author = {Shikov, AE and Merkushova, AV and Savina, IA and Nizhnikov, AA and Antonets, KS}, title = {The man, the plant, and the insect: shooting host specificity determinants in Serratia marcescens pangenome.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1211999}, doi = {10.3389/fmicb.2023.1211999}, pmid = {38029097}, issn = {1664-302X}, abstract = {INTRODUCTION: Serratia marcescens is most commonly known as an opportunistic pathogen causing nosocomial infections. It, however, was shown to infect a wide range of hosts apart from vertebrates such as insects or plants as well, being either pathogenic or growth-promoting for the latter. Despite being extensively studied in terms of virulence mechanisms during human infections, there has been little evidence of which factors determine S. marcescens host specificity. On that account, we analyzed S. marcescens pangenome to reveal possible specificity factors.

METHODS: We selected 73 high-quality genome assemblies of complete level and reconstructed the respective pangenome and reference phylogeny based on core genes alignment. To find an optimal pipeline, we tested current pangenomic tools and obtained several phylogenetic inferences. The pangenome was rich in its accessory component and was considered open according to the Heaps' law. We then applied the pangenome-wide associating method (pan-GWAS) and predicted positively associated gene clusters attributed to three host groups, namely, humans, insects, and plants.

RESULTS: According to the results, significant factors relating to human infections included transcriptional regulators, lipoproteins, ABC transporters, and membrane proteins. Host preference toward insects, in its turn, was associated with diverse enzymes, such as hydrolases, isochorismatase, and N-acetyltransferase with the latter possibly exerting a neurotoxic effect. Finally, plant infection may be conducted through type VI secretion systems and modulation of plant cell wall synthesis. Interestingly, factors associated with plants also included putative growth-promoting proteins like enzymes performing xenobiotic degradation and releasing ammonium irons. We also identified overrepresented functional annotations within the sets of specificity factors and found that their functional characteristics fell into separate clusters, thus, implying that host adaptation is represented by diverse functional pathways. Finally, we found that mobile genetic elements bore specificity determinants. In particular, prophages were mainly associated with factors related to humans, while genetic islands-with insects and plants, respectively.

DISCUSSION: In summary, functional enrichments coupled with pangenomic inferences allowed us to hypothesize that the respective host preference is carried out through distinct molecular mechanisms of virulence. To the best of our knowledge, the presented research is the first to identify specific genomic features of S. marcescens assemblies isolated from different hosts at the pangenomic level.}, } @article {pmid38028596, year = {2023}, author = {Kabata, F and Thaldar, D}, title = {The human genome as the common heritage of humanity.}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1282515}, doi = {10.3389/fgene.2023.1282515}, pmid = {38028596}, issn = {1664-8021}, abstract = {While debate on the international regulation of human genomic research remains unsettled, the Universal Declaration on the Human Genome and Human Rights, 1997 qualifies the human genome as "heritage of humankind" in a symbolic sense. Using document analysis this article assesses whether, how and to what extent the common heritage framework is relevant in regulation of human genomic research. The article traces the history of the Human Genome Project to reveal the international community's race against privatization of the human genome and its resulting qualification as the common heritage of humanity. Further, it reviews the archival records of UNESCO's International Bioethics Committee to discover the rationale for qualifying the human genome as common heritage of humankind. The article finds that the common heritage of mankind framework remains relevant to the application of the human genome at the collective level. However, the framework is at odds with the individual dimension of the human genome based on individual personality rights. The article thus argues that the right to benefit from scientific progress and its applications offers an alternative international regulatory framework for human genomic research.}, } @article {pmid38026211, year = {2023}, author = {Ghaly, TM and Rajabal, V and Penesyan, A and Coleman, NV and Paulsen, IT and Gillings, MR and Tetu, SG}, title = {Functional enrichment of integrons: Facilitators of antimicrobial resistance and niche adaptation.}, journal = {iScience}, volume = {26}, number = {11}, pages = {108301}, doi = {10.1016/j.isci.2023.108301}, pmid = {38026211}, issn = {2589-0042}, abstract = {Integrons are genetic elements, found among diverse bacteria and archaea, that capture and rearrange gene cassettes to rapidly generate genetic diversity and drive adaptation. Despite their broad taxonomic and geographic prevalence, and their role in microbial adaptation, the functions of gene cassettes remain poorly characterized. Here, using a combination of bioinformatic and experimental analyses, we examined the functional diversity of gene cassettes from different environments. We find that cassettes encode diverse antimicrobial resistance (AMR) determinants, including those conferring resistance to antibiotics currently in the developmental pipeline. Further, we find a subset of cassette functions is universally enriched relative to their broader metagenomes. These are largely involved in (a)biotic interactions, including AMR, phage defense, virulence, biodegradation, and stress tolerance. The remainder of functions are sample-specific, suggesting that they confer localised functions relevant to their microenvironment. Together, they comprise functional profiles different from bulk metagenomes, representing niche-adaptive components of the prokaryotic pangenome.}, } @article {pmid38023484, year = {2023}, author = {Yocca, AE and Platts, A and Alger, E and Teresi, S and Mengist, MF and Benevenuto, J and Ferrão, LFV and Jacobs, M and Babinski, M and Magallanes-Lundback, M and Bayer, P and Golicz, A and Humann, JL and Main, D and Espley, RV and Chagné, D and Albert, NW and Montanari, S and Vorsa, N and Polashock, J and Díaz-Garcia, L and Zalapa, J and Bassil, NV and Munoz, PR and Iorizzo, M and Edger, PP}, title = {Blueberry and cranberry pangenomes as a resource for future genetic studies and breeding efforts.}, journal = {Horticulture research}, volume = {10}, number = {11}, pages = {uhad202}, doi = {10.1093/hr/uhad202}, pmid = {38023484}, issn = {2662-6810}, abstract = {Domestication of cranberry and blueberry began in the United States in the early 1800s and 1900s, respectively, and in part owing to their flavors and health-promoting benefits are now cultivated and consumed worldwide. The industry continues to face a wide variety of production challenges (e.g. disease pressures), as well as a demand for higher-yielding cultivars with improved fruit quality characteristics. Unfortunately, molecular tools to help guide breeding efforts for these species have been relatively limited compared with those for other high-value crops. Here, we describe the construction and analysis of the first pangenome for both blueberry and cranberry. Our analysis of these pangenomes revealed both crops exhibit great genetic diversity, including the presence-absence variation of 48.4% genes in highbush blueberry and 47.0% genes in cranberry. Auxiliary genes, those not shared by all cultivars, are significantly enriched with molecular functions associated with disease resistance and the biosynthesis of specialized metabolites, including compounds previously associated with improving fruit quality traits. The discovery of thousands of genes, not present in the previous reference genomes for blueberry and cranberry, will serve as the basis of future research and as potential targets for future breeding efforts. The pangenome, as a multiple-sequence alignment, as well as individual annotated genomes, are publicly available for analysis on the Genome Database for Vaccinium-a curated and integrated web-based relational database. Lastly, the core-gene predictions from the pangenomes will serve useful to develop a community genotyping platform to guide future molecular breeding efforts across the family.}, } @article {pmid38017392, year = {2023}, author = {Jensen, MG and Svraka, L and Baez, E and Lund, M and Poehlein, A and Brüggemann, H}, title = {Species- and strain-level diversity of Corynebacteria isolated from human facial skin.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {366}, pmid = {38017392}, issn = {1471-2180}, support = {LF-OC-21-000826//LEO Fondet/ ; }, abstract = {BACKGROUND: Sequencing of the human skin microbiome revealed that Corynebacterium is an ubiquitous and abundant bacterial genus on human skin. Shotgun sequencing further highlighted the microbial "dark matter" of the skin microbiome, consisting of microorganisms, including corynebacterial species that were not cultivated and genome-sequenced so far. In this pilot project, facial human skin swabs of 13 persons were cultivated to selectively obtain corynebacteria. 54 isolates were collected and 15 of these were genome-sequenced and the pan-genome was determined. The strains were biochemically characterized and antibiotic susceptibility testing (AST) was performed.

RESULTS: Among the 15 sequenced strains, nine different corynebacterial species were found, including two so far undescribed species, tentatively named "Corynebacterium vikingii" and "Corynebacterium borealis", for which closed genome sequences were obtained. Strain variability beyond the species level was determined in biochemical tests, such as the variable presence of urease activity and the capacity to ferment different sugars. The ability to grow under anaerobic conditions on solid agar was found to be species-specific. AST revealed resistances to clindamycin in seven strains. A Corynebacterium pseudokroppenstedtii strain showed additional resistance towards beta-lactam and fluoroquinolone antibiotics; a chromosomally located 17 kb gene cluster with five antibiotic resistance genes was found in the closed genome of this strain.

CONCLUSIONS: Taken together, this pilot study identified an astonishing diversity of cutaneous corynebacterial species in a relatively small cohort and determined species- and strain-specific individualities regarding biochemical and resistance profiles. This further emphasizes the need for cultivation-based studies to be able to study these microorganisms in more detail, in particular regarding their host-interacting and, potentially, -beneficial and/or -detrimental properties.}, } @article {pmid38015202, year = {2023}, author = {Williams, AN and Ma, A and Croxen, MA and Demczuk, WHB and Martin, I and Tyrrell, GJ}, title = {Genomic analysis of Streptococcus pneumoniae serogroup 20 isolates in Alberta, Canada from 1993-2019.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001141}, pmid = {38015202}, issn = {2057-5858}, abstract = {In the province of Alberta, Canada, invasive disease caused by Streptococcus pneumoniae serogroup 20 (serotypes 20A/20B) has been increasing in incidence. Here, we characterize provincial invasive serogroup 20 isolates collected from 1993 to 2019 alongside invasive and non-invasive serogroup 20 isolates from the Global Pneumococcal Sequencing (GPS) Project collected from 1998 to 2015. Trends in clinical metadata and geographic location were evaluated, and serogroup 20 isolate genomes were subjected to molecular sequence typing, virulence and antimicrobial resistance factor mining, phylogenetic analysis and pangenome calculation. Two hundred and seventy-four serogroup 20 isolates from Alberta were sequenced, and analysed along with 95 GPS Project genomes. The majority of invasive Alberta serogroup 20 isolates were identified after 2007 in primarily middle-aged adults and typed predominantly as ST235, a sequence type that was rare among GPS Project isolates. Most Alberta isolates carried a full-length whaF capsular gene, suggestive of serotype 20B. All Alberta and GPS Project genomes carried molecular resistance determinants implicated in fluoroquinolone and macrolide resistance, with a few Alberta isolates exhibiting phenotypic resistance to azithromycin, clindamycin, erythromycin, tetracycline and trimethoprim-sulfamethoxazole, as well as non-susceptibility to tigecycline. All isolates carried multiple virulence factors including those involved in adherence, immune modulation and nutrient uptake, as well as exotoxins and exoenzymes. Phylogenetically, Alberta serogroup 20 isolates clustered with predominantly invasive GPS Project isolates from the USA, Israel, Brazil and Nepal. Overall, this study highlights the increasing incidence of invasive S. pneumoniae serogroup 20 disease in Alberta, Canada, and provides insights into the genetic and clinical characteristics of these isolates within a global context.}, } @article {pmid38014076, year = {2023}, author = {Ramsbottom, KA and Prakash, A and Riverol, YP and Camacho, OM and Sun, Z and Kundu, DJ and Bowler-Barnett, E and Martin, M and Fan, J and Chebotarov, D and McNally, KL and Deutsch, EW and Vizcaíno, JA and Jones, AR}, title = {A meta-analysis of rice phosphoproteomics data to understand variation in cell signalling across the rice pan-genome.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.11.17.567512}, pmid = {38014076}, abstract = {Phosphorylation is the most studied post-translational modification, and has multiple biological functions. In this study, we have re-analysed publicly available mass spectrometry proteomics datasets enriched for phosphopeptides from Asian rice (Oryza sativa). In total we identified 15,522 phosphosites on serine, threonine and tyrosine residues on rice proteins. We identified sequence motifs for phosphosites, and link motifs to enrichment of different biological processes, indicating different downstream regulation likely caused by different kinase groups. We cross-referenced phosphosites against the rice 3,000 genomes, to identify single amino acid variations (SAAVs) within or proximal to phosphosites that could cause loss of a site in a given rice variety. The data was clustered to identify groups of sites with similar patterns across rice family groups, for example those highly conserved in Japonica, but mostly absent in Aus type rice varieties - known to have different responses to drought. These resources can assist rice researchers to discover alleles with significantly different functional effects across rice varieties. The data has been loaded into UniProt Knowledge-Base - enabling researchers to visualise sites alongside other data on rice proteins e.g. structural models from AlphaFold2, PeptideAtlas and the PRIDE database - enabling visualisation of source evidence, including scores and supporting mass spectra.}, } @article {pmid38012560, year = {2023}, author = {Liu, H and Zhao, W and Hua, W and Liu, J}, title = {Correction: A large-scale population based organelle pan-genomes construction and phylogeny analysis reveal the genetic diversity and the evolutionary origins of chloroplast and mitochondrion in Brassica napus L.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {716}, pmid = {38012560}, issn = {1471-2164}, } @article {pmid38012347, year = {2023}, author = {Edwards, D and Batley, J}, title = {Teatime for pangenomics.}, journal = {Nature plants}, volume = {}, number = {}, pages = {}, pmid = {38012347}, issn = {2055-0278}, } @article {pmid38012346, year = {2023}, author = {Chen, S and Wang, P and Kong, W and Chai, K and Zhang, S and Yu, J and Wang, Y and Jiang, M and Lei, W and Chen, X and Wang, W and Gao, Y and Qu, S and Wang, F and Wang, Y and Zhang, Q and Gu, M and Fang, K and Ma, C and Sun, W and Ye, N and Wu, H and Zhang, X}, title = {Gene mining and genomics-assisted breeding empowered by the pangenome of tea plant Camellia sinensis.}, journal = {Nature plants}, volume = {}, number = {}, pages = {}, pmid = {38012346}, issn = {2055-0278}, support = {No. 32222019//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, abstract = {Tea is one of the world's oldest crops and is cultivated to produce beverages with various flavours. Despite advances in sequencing technologies, the genetic mechanisms underlying key agronomic traits of tea remain unclear. In this study, we present a high-quality pangenome of 22 elite cultivars, representing broad genetic diversity in the species. Our analysis reveals that a recent long terminal repeat burst contributed nearly 20% of gene copies, introducing functional genetic variants that affect phenotypes such as leaf colour. Our graphical pangenome improves the efficiency of genome-wide association studies and allows the identification of key genes controlling bud flush timing. We also identified strong correlations between allelic variants and flavour-related chemistries. These findings deepen our understanding of the genetic basis of tea quality and provide valuable genomic resources to facilitate its genomics-assisted breeding.}, } @article {pmid37961504, year = {2023}, author = {Hong, A and Oliva, M and Köppl, D and Bannai, H and Boucher, C and Gagie, T}, title = {PFP-FM: An Accelerated FM-index.}, journal = {Research square}, volume = {}, number = {}, pages = {}, pmid = {37961504}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; }, abstract = {FM-indexes are a crucial data structure in DNA alignment, but searching with them usually takes at least one random access per character in the query pattern. Ferragina and Fischer [1] observed in 2007 that word-based indexes often use fewer random accesses than character-based indexes, and thus support faster searches. Since DNA lacks natural word-boundaries, however, it is necessary to parse it somehow before applying word-based FM-indexing. Last year, Deng et al. [2] proposed parsing genomic data by induced suffix sorting, and showed the resulting word-based FM-indexes support faster counting queries than standard FM-indexes when patterns are a few thousand characters or longer. In this paper we show that using prefix-free parsing-which takes parameters that let us tune the average length of the phrases-instead of induced suffix sorting, gives a significant speedup for patterns of only a few hundred characters. We implement our method and demonstrate it is between 3 and 18 times faster than competing methods on queries to GRCh38, and is consistently faster on queries made to 25,000, 50,000 and 100,000 SARS-CoV-2 genomes. Hence, it seems our method accelerates the performance of count over all state-of-the-art methods with a minor increase in the memory. The source code for PFP-FM is available at https://github.com/marco-oliva/afm.}, } @article {pmid38008766, year = {2023}, author = {Mackenzie, A and Norman, M and Gessese, M and Chen, C and Sørensen, C and Hovmøller, M and Ma, L and Forrest, K and Hickey, L and Bariana, H and Bansal, U and Periyannan, S}, title = {Wheat stripe rust resistance locus YR63 is a hot spot for evolution of defence genes - a pangenome discovery.}, journal = {BMC plant biology}, volume = {23}, number = {1}, pages = {590}, pmid = {38008766}, issn = {1471-2229}, abstract = {BACKGROUND: Stripe rust, caused by Puccinia striiformis f. sp. tritici (Pst), poses a threat to global wheat production. Deployment of widely effective resistance genes underpins management of this ongoing threat. This study focused on the mapping of stripe rust resistance gene YR63 from a Portuguese hexaploid wheat landrace AUS27955 of the Watkins Collection.

RESULTS: YR63 exhibits resistance to a broad spectrum of Pst races from Australia, Africa, Asia, Europe, Middle East and South America. It was mapped to the short arm of chromosome 7B, between two single nucleotide polymorphic (SNP) markers sunCS_YR63 and sunCS_67, positioned at 0.8 and 3.7 Mb, respectively, in the Chinese Spring genome assembly v2.1. We characterised YR63 locus using an integrated approach engaging targeted genotyping-by-sequencing (tGBS), mutagenesis, resistance gene enrichment and sequencing (MutRenSeq), RNA sequencing (RNASeq) and comparative genomic analysis with tetraploid (Zavitan and Svevo) and hexaploid (Chinese Spring) wheat genome references and 10+ hexaploid wheat genomes. YR63 is positioned at a hot spot enriched with multiple nucleotide-binding and leucine rich repeat (NLR) and kinase domain encoding genes, known widely for defence against pests and diseases in plants and animals. Detection of YR63 within these gene clusters is not possible through short-read sequencing due to high homology between members. However, using the sequence of a NLR member we were successful in detecting a closely linked SNP marker for YR63 and validated on a panel of Australian bread wheat, durum and triticale cultivars.

CONCLUSIONS: This study highlights YR63 as a valuable source for resistance against Pst in Australia and elsewhere. The closely linked SNP marker will facilitate rapid introgression of YR63 into elite cultivars through marker-assisted selection. The bottleneck of this study reinforces the necessity for a long-read sequencing such as PacBio or Oxford Nanopore based techniques for accurate detection of the underlying resistance gene when it is part of a large gene cluster.}, } @article {pmid38004814, year = {2023}, author = {Carter, MQ and Quiñones, B and He, X and Pham, A and Carychao, D and Cooley, MB and Lo, CC and Chain, PSG and Lindsey, RL and Bono, JL}, title = {Genomic and Phenotypic Characterization of Shiga Toxin-Producing Escherichia albertii Strains Isolated from Wild Birds in a Major Agricultural Region in California.}, journal = {Microorganisms}, volume = {11}, number = {11}, pages = {}, doi = {10.3390/microorganisms11112803}, pmid = {38004814}, issn = {2076-2607}, support = {USDA-ARS CRIS projects 2030-42000-049-00D, 2030-42000-052-000D, and 2030-42000-055-000D//United States Department of Agriculture/ ; }, abstract = {Escherichia albertii is an emerging foodborne pathogen. To better understand the pathogenesis and health risk of this pathogen, comparative genomics and phenotypic characterization were applied to assess the pathogenicity potential of E. albertii strains isolated from wild birds in a major agricultural region in California. Shiga toxin genes stx2f were present in all avian strains. Pangenome analyses of 20 complete genomes revealed a total of 11,249 genes, of which nearly 80% were accessory genes. Both core gene-based phylogenetic and accessory gene-based relatedness analyses consistently grouped the three stx2f-positive clinical strains with the five avian strains carrying ST7971. Among the three Stx2f-converting prophage integration sites identified, ssrA was the most common one. Besides the locus of enterocyte effacement and type three secretion system, the high pathogenicity island, OI-122, and type six secretion systems were identified. Substantial strain variation in virulence gene repertoire, Shiga toxin production, and cytotoxicity were revealed. Six avian strains exhibited significantly higher cytotoxicity than that of stx2f-positive E. coli, and three of them exhibited a comparable level of cytotoxicity with that of enterohemorrhagic E. coli outbreak strains, suggesting that wild birds could serve as a reservoir of E. albertii strains with great potential to cause severe diseases in humans.}, } @article {pmid38004763, year = {2023}, author = {Xue, M and Gao, Q and Yan, R and Liu, L and Wang, L and Wen, B and Wen, C}, title = {Comparative Genomic Analysis of Shrimp-Pathogenic Vibrio parahaemolyticus LC and Intraspecific Strains with Emphasis on Virulent Factors of Mobile Genetic Elements.}, journal = {Microorganisms}, volume = {11}, number = {11}, pages = {}, doi = {10.3390/microorganisms11112752}, pmid = {38004763}, issn = {2076-2607}, support = {32072995//National Natural Science Foundation of China/ ; K22218//Modern Seed Industry Park for Whiteleg Shrimp of Guangdong Province/ ; 004//Lianjiang Shrimp Aquaculture Group Co., Ltd., Guangdong, China/ ; }, abstract = {Vibrio parahaemolyticus exhibits severe pathogenicity in humans and animals worldwide. In this study, genome sequencing and comparative analyses were conducted for in-depth characterization of the virulence factor (VF) repertoire of V. parahaemolyticus strain LC, which presented significant virulence to shrimp Litopenaeus vannamei. Strain LC, harboring two circular chromosomes and three linear plasmids, demonstrated ≥98.14% average nucleotide identities with 31 publicly available V. parahaemolyticus genomes, including 13, 11, and 7 shrimp-, human-, and non-pathogenic strains, respectively. Phylogeny analysis based on dispensable genes of pan-genome clustered 11 out of 14 shrimp-pathogenic strains and 7 out of 11 clinical strains into two distinct clades, indicating the close association between host-specific pathogenicity and accessory genes. The VFDB database revealed that 150 VFs of LC were mainly associated with the secretion system, adherence, antiphagocytosis, chemotaxis, motility, and iron uptake, whereas no homologs of the typical pathogenic genes pirA, pirB, tdh, and trh were detected. Four genes, mshB, wbfT, wbfU, and wbtI, were identified in both types of pathogenic strains but were absent in non-pathogens. Notably, a unique cluster similar to Yen-Tc, which encodes an insecticidal toxin complex, and diverse toxin-antitoxin (TA) systems, were identified on the mobile genetic elements (MGEs) of LC. Conclusively, in addition to the common VFs, various unique MGE-borne VFs, including the Yen-Tc cluster, TA components, and multiple chromosome-encoded chitinase genes, may contribute to the full spectrum of LC virulence. Moreover, V. parahaemolyticus demonstrates host-specific virulence, which potentially drives the origin and spread of pathogenic factors.}, } @article {pmid38004738, year = {2023}, author = {Wang, C and Mao, L and Bao, G and Zhu, H}, title = {Pan-Genome Analyses of the Genus Cohnella and Proposal of the Novel Species Cohnella silvisoli sp. nov., Isolated from Forest Soil.}, journal = {Microorganisms}, volume = {11}, number = {11}, pages = {}, doi = {10.3390/microorganisms11112726}, pmid = {38004738}, issn = {2076-2607}, support = {32001115//the Natural Science Foundation of China/ ; 2022JB087//the Initial Funding for Doctoral Research of Huizhou University/ ; 2022A1515111059//the grant from the Basic and Applied Basic Research Foundation of Guangdong Province/ ; 2023A04J1432//the grant from the Guangzhou Science and Technology Plan Project/ ; }, abstract = {Two strains, designated NL03-T5[T] and NL03-T5-1, were isolated from a soil sample collected from the Nanling National Forests, Guangdong Province, PR China. The two strains were Gram-stain-positive, aerobic, rod-shaped and had lophotrichous flagellation. Strain NL03-T5[T] could secrete extracellular mucus whereas NL03-T5-1 could not. Phylogenetic analysis based on 16S rRNA gene sequences revealed that the two strains belong to the genus Cohnella, were most closely related to Cohnella lupini LMG 27416[T] (95.9% and 96.1% similarities), and both showed 94.0% similarity with Cohnella arctica NRRL B-59459[T], respectively. The two strains showed 99.8% 16S rRNA gene sequence similarity between them. The draft genome size of strain NL03-T5[T] was 7.44 Mbp with a DNA G+C content of 49.2 mol%. The average nucleotide identities (ANI) and the digital DNA-DNA hybridization (dDDH) values between NL03-T5[T] and NL03-T5-1 were 99.98% and 100%, indicating the two strains were of the same species. Additionally, the ANI and dDDH values between NL03-T5[T] and C. lupini LMG 27416[T] were 76.1% and 20.4%, respectively. The major cellular fatty acids of strain NL03-T5[T] included anteiso-C15:0 and iso-C16:0. The major polar lipids and predominant respiratory quinone were diphosphatidylglycerol (DPG) and menaquinone-7 (MK-7). Based on phylogenetic analysis, phenotypic and chemotaxonomic characterization, genomic DNA G+C content, and ANI and dDDH values, strains NL03-T5[T] and NL03-T5-1 represent novel species in the genus Cohnella, for which the name Cohnella silvisoli is proposed. The type strain is NL03-T5[T] (=GDMCC 1.2294[T] = JCM 34999[T]). Furthermore, comparative genomics revealed that the genus Cohnella had an open pan-genome. The pan-genome of 29 Cohnella strains contained 41,356 gene families, and the number of strain-specific genes ranged from 6 to 1649. The results may explain the good adaptability of the Cohnella strains to different habitats at the genetic level.}, } @article {pmid38003271, year = {2023}, author = {Singh, G and Singh, N and Ellur, RK and Balamurugan, A and Prakash, G and Rathour, R and Mondal, KK and Bhowmick, PK and Gopala Krishnan, S and Nagarajan, M and Seth, R and Vinod, KK and Singh, V and Bollinedi, H and Singh, AK}, title = {Genetic Enhancement for Biotic Stress Resistance in Basmati Rice through Marker-Assisted Backcross Breeding.}, journal = {International journal of molecular sciences}, volume = {24}, number = {22}, pages = {}, doi = {10.3390/ijms242216081}, pmid = {38003271}, issn = {1422-0067}, support = {BT/PR13578/AG/106/991/2015 dated 05/01/2016//Department of Biotechnology/ ; }, abstract = {Pusa Basmati 1509 (PB1509) is one of the major foreign-exchange-earning varieties of Basmati rice; it is semi-dwarf and early maturing with exceptional cooking quality and strong aroma. However, it is highly susceptible to various biotic stresses including bacterial blight and blast. Therefore, bacterial blight resistance genes, namely, xa13 + Xa21 and Xa38, and fungal blast resistance genes Pi9 + Pib and Pita were incorporated into the genetic background of recurrent parent (RP) PB1509 using donor parents, namely, Pusa Basmati 1718 (PB1718), Pusa 1927 (P1927), Pusa 1929 (P1929) and Tetep, respectively. Foreground selection was carried out with respective gene-linked markers, stringent phenotypic selection for recurrent parent phenotype, early generation background selection with Simple sequence repeat (SSR) markers, and background analysis at advanced generations with Rice Pan Genome Array comprising 80K SNPs. This has led to the development of Near isogenic lines (NILs), namely, Pusa 3037, Pusa 3054, Pusa 3060 and Pusa 3066 carrying genes xa13 + Xa21, Xa38, Pi9 + Pib and Pita with genomic similarity of 98.25%, 98.92%, 97.38% and 97.69%, respectively, as compared to the RP. Based on GGE-biplot analysis, Pusa 3037-1-44-3-164-20-249-2 carrying xa13 + Xa21, Pusa 3054-2-47-7-166-24-261-3 carrying Xa38, Pusa 3060-3-55-17-157-4-124-1 carrying Pi9 + Pib, and Pusa 3066-4-56-20-159-8-174-1 carrying Pita were identified to be relatively stable and better-performing individuals in the tested environments. Intercrossing between the best BC3F1s has led to the generation of Pusa 3122 (xa13 + Xa21 + Xa38), Pusa 3124 (Xa38 + Pi9 + Pib) and Pusa 3123 (Pi9 + Pib + Pita) with agronomy, grain and cooking quality parameters at par with PB1509. Cultivation of such improved varieties will help farmers reduce the cost of cultivation with decreased pesticide use and improve productivity with ensured safety to consumers.}, } @article {pmid38003233, year = {2023}, author = {Zhegalova, IV and Vasiluev, PA and Flyamer, IM and Shtompel, AS and Glazyrina, E and Shilova, N and Minzhenkova, M and Markova, Z and Petrova, NV and Dashinimaev, EB and Razin, SV and Ulianov, SV}, title = {Trisomies Reorganize Human 3D Genome.}, journal = {International journal of molecular sciences}, volume = {24}, number = {22}, pages = {}, doi = {10.3390/ijms242216044}, pmid = {38003233}, issn = {1422-0067}, support = {075-15-2021-1062//Russian Ministry of Science and Higher Education/ ; }, abstract = {Trisomy is the presence of one extra copy of an entire chromosome or its part in a cell nucleus. In humans, autosomal trisomies are associated with severe developmental abnormalities leading to embryonic lethality, miscarriage or pronounced deviations of various organs and systems at birth. Trisomies are characterized by alterations in gene expression level, not exclusively on the trisomic chromosome, but throughout the genome. Here, we applied the high-throughput chromosome conformation capture technique (Hi-C) to study chromatin 3D structure in human chorion cells carrying either additional chromosome 13 (Patau syndrome) or chromosome 16 and in cultured fibroblasts with extra chromosome 18 (Edwards syndrome). The presence of extra chromosomes results in systematic changes of contact frequencies between small and large chromosomes. Analyzing the behavior of individual chromosomes, we found that a limited number of chromosomes change their contact patterns stochastically in trisomic cells and that it could be associated with lamina-associated domains (LAD) and gene content. For trisomy 13 and 18, but not for trisomy 16, the proportion of compacted loci on a chromosome is correlated with LAD content. We also found that regions of the genome that become more compact in trisomic cells are enriched in housekeeping genes, indicating a possible decrease in chromatin accessibility and transcription level of these genes. These results provide a framework for understanding the mechanisms of pan-genome transcription dysregulation in trisomies in the context of chromatin spatial organization.}, } @article {pmid38002453, year = {2023}, author = {Qian, M and Han, X and Liu, J and Xu, P and Tao, F}, title = {Genomic Insights on the Carbon-Negative Workhorse: Systematical Comparative Genomic Analysis on 56 Synechococcus Strains.}, journal = {Bioengineering (Basel, Switzerland)}, volume = {10}, number = {11}, pages = {}, doi = {10.3390/bioengineering10111329}, pmid = {38002453}, issn = {2306-5354}, support = {No. 2018YFA0903600//National Key Research and Development Program of China/ ; }, abstract = {Synechococcus, a type of ancient photosynthetic cyanobacteria, is crucial in modern carbon-negative synthetic biology due to its potential for producing bioenergy and high-value products. With its high biomass, fast growth rate, and established genetic manipulation tools, Synechococcus has become a research focus in recent years. Abundant germplasm resources have been accumulated from various habitats, including temperature and salinity conditions relevant to industrialization. In this study, a comprehensive analysis of complete genomes of the 56 Synechococcus strains currently available in public databases was performed, clarifying genetic relationships, the adaptability of Synechococcus to the environment, and its reflection at the genomic level. This was carried out via pan-genome analysis and a detailed comparison of the functional gene groups. The results revealed an open-genome pattern, with 275 core genes and variable genome sizes within these strains. The KEGG annotation and orthology composition comparisons unveiled that the cold and thermophile strains have 32 and 84 unique KO functional units in their shared core gene functional units, respectively. Each KO functional unit reflects unique gene families and pathways. In terms of salt tolerance and comparative genomics, there are 65 unique KO functional units in freshwater-adapted strains and 154 in strictly marine strains. By delving into these aspects, our understanding of the metabolic potential of Synechococcus was deepened, promoting the development and industrial application of cyanobacterial biotechnology.}, } @article {pmid38001096, year = {2023}, author = {Hyun, JC and Monk, JM and Szubin, R and Hefner, Y and Palsson, BO}, title = {Global pathogenomic analysis identifies known and candidate genetic antimicrobial resistance determinants in twelve species.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {7690}, pmid = {38001096}, issn = {2041-1723}, support = {U0AI124316//U.S. Department of Health & Human Services | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; U0AI124316//U.S. Department of Health & Human Services | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; T32GM8806//U.S. Department of Health & Human Services | National Institutes of Health (NIH)/ ; }, abstract = {Surveillance programs for managing antimicrobial resistance (AMR) have yielded thousands of genomes suited for data-driven mechanism discovery. We present a workflow integrating pangenomics, gene annotation, and machine learning to identify AMR genes at scale. When applied to 12 species, 27,155 genomes, and 69 drugs, we 1) find AMR gene transfer mostly confined within related species, with 925 genes in multiple species but just eight in multiple phylogenetic classes, 2) demonstrate that discovery-oriented support vector machines outperform contemporary methods at recovering known AMR genes, recovering 263 genes compared to 145 by Pyseer, and 3) identify 142 AMR gene candidates. Validation of two candidates in E. coli BW25113 reveals cases of conditional resistance: ΔcycA confers ciprofloxacin resistance in minimal media with D-serine, and frdD V111D confers ampicillin resistance in the presence of ampC by modifying the overlapping promoter. We expect this approach to be adaptable to other species and phenotypes.}, } @article {pmid38000216, year = {2023}, author = {Gmeiner, A and Njage, PMK and Hansen, LT and Aarestrup, FM and Leekitcharoenphon, P}, title = {Predicting Listeria monocytogenes virulence potential using whole genome sequencing and machine learning.}, journal = {International journal of food microbiology}, volume = {410}, number = {}, pages = {110491}, doi = {10.1016/j.ijfoodmicro.2023.110491}, pmid = {38000216}, issn = {1879-3460}, abstract = {Contamination with food-borne pathogens, such as Listeria monocytogenes, remains a big concern for food safety. Hence, rigorous and continuous microbial surveillance is a standard procedure. At this point, however, the food industry and authorities only focus on detection of Listeria monocytogenes without characterization of individual strains into groups of more or less concern. As whole genome sequencing (WGS) gains increasing interest in the industry, this methodology presents an opportunity to obtain finer resolution of microbial traits such as virulence. Within this study, we therefore aimed to explore the use of WGS in combination with Machine Learning (ML) to predict L. monocytogenes virulence potential on a sub-species level. The WGS datasets used in this study for ML model training consisted of i) national surveillance isolates (n = 169, covering 38 MLST types) and ii) publicly available isolates acquired through the GenomeTrakr network (n = 2880, spanning 80 MLST types). We used the clinical frequency, i.e., ratio of the number of clinical isolates to total amount of isolates, as estimate for virulence potential. The predictive performance of input features from three different genomic levels (i.e., virulence genes, pan-genome genes, and single nucleotide polymorphisms (SNPs)) and six machine learning algorithms (i.e., Support Vector Machine with a linear kernel, Support Vector Machine with a radial kernel, Random Forrest, Neural Networks, LogitBoost, and Majority Voting) were compared. Our machine learning models predicted sub-species virulence potential with nested cross-validation F1-scores up to 0.88 for the majority voting classifier trained on national surveillance data and using pan-genome genes as input features. The validation of the pre-trained ML models based on 101 previously in vivo studied isolates resulted in F1-scores up to 0.76. Furthermore, we found that the more rapid and less computationally intensive raw read alignment yields comparably accurate models as de novo assembly. The results of our study suggest that a majority voting classifier trained on pan-genome genes is the best and most robust choice for the prediction of clinical frequency. Our study contributes to more rapid and precise characterization of L. monocytogenes virulence and its variation on a sub-species level. We further demonstrated a possible application of WGS data in the context of microbial hazard characterization for food safety. In the future, predictive models may assist case-specific microbial risk management in the food industry. The python code, pre-trained models, and prediction pipeline are deposited at (https://github.com/agmei/LmonoVirulenceML).}, } @article {pmid38001525, year = {2023}, author = {Gao, G and Zhang, H and Ni, J and Zhao, X and Zhang, K and Wang, J and Kong, X and Wang, Q}, title = {Insights into genetic diversity and phenotypic variations in domestic geese through comprehensive population and pan-genome analysis.}, journal = {Journal of animal science and biotechnology}, volume = {14}, number = {1}, pages = {150}, pmid = {38001525}, issn = {1674-9782}, support = {cstc2022jxjl80007//Chongqing Scientific Research Institution Performance Incentive Project/ ; CARS-42-51//Earmarked Fund for China Agriculture Research System/ ; 22534C-22//Key R&D Project in Agriculture and Animal Husbandry of Rongchang/ ; CSTB2022NSCQ-MSX0434//Natural Science Foundation of Chongqing Project/ ; 2022NSFSC0605//Natural Science Foundation of Sichuan Province/ ; 2021YFS0379//Natural Science Foundation of Sichuan Province/ ; cstc2021ycjh-bgzxm0248//Chongqing Technology Innovation and Application Development Project/ ; }, abstract = {BACKGROUND: Domestic goose breeds are descended from either the Swan goose (Anser cygnoides) or the Greylag goose (Anser anser), exhibiting variations in body size, reproductive performance, egg production, feather color, and other phenotypic traits. Constructing a pan-genome facilitates a thorough identification of genetic variations, thereby deepening our comprehension of the molecular mechanisms underlying genetic diversity and phenotypic variability.

RESULTS: To comprehensively facilitate population genomic and pan-genomic analyses in geese, we embarked on the task of 659 geese whole genome resequencing data and compiling a database of 155 RNA-seq samples. By constructing the pan-genome for geese, we generated non-reference contigs totaling 612 Mb, unveiling a collection of 2,813 novel genes and pinpointing 15,567 core genes, 1,324 softcore genes, 2,734 shell genes, and 878 cloud genes in goose genomes. Furthermore, we detected an 81.97 Mb genomic region showing signs of genome selection, encompassing the TGFBR2 gene correlated with variations in body weight among geese. Genome-wide association studies utilizing single nucleotide polymorphisms (SNPs) and presence-absence variation revealed significant genomic associations with various goose meat quality, reproductive, and body composition traits. For instance, a gene encoding the SVEP1 protein was linked to carcass oblique length, and a distinct gene-CDS haplotype of the SVEP1 gene exhibited an association with carcass oblique length. Notably, the pan-genome analysis revealed enrichment of variable genes in the "hair follicle maturation" Gene Ontology term, potentially linked to the selection of feather-related traits in geese. A gene presence-absence variation analysis suggested a reduced frequency of genes associated with "regulation of heart contraction" in domesticated geese compared to their wild counterparts. Our study provided novel insights into gene expression features and functions by integrating gene expression patterns across multiple organs and tissues in geese and analyzing population variation.

CONCLUSION: This accomplishment originates from the discernment of a multitude of selection signals and candidate genes associated with a wide array of traits, thereby markedly enhancing our understanding of the processes underlying domestication and breeding in geese. Moreover, assembling the pan-genome for geese has yielded a comprehensive apprehension of the goose genome, establishing it as an indispensable asset poised to offer innovative viewpoints and make substantial contributions to future geese breeding initiatives.}, } @article {pmid37996991, year = {2023}, author = {Li, Y and Yao, J and Sang, H and Wang, Q and Su, L and Zhao, X and Xia, Z and Wang, F and Wang, K and Lou, D and Wang, G and Waterhouse, RM and Wang, H and Luo, S and Sun, C}, title = {Pan-genome analysis highlights the role of structural variation in the evolution and environmental adaptation of Asian honeybees.}, journal = {Molecular ecology resources}, volume = {}, number = {}, pages = {}, doi = {10.1111/1755-0998.13905}, pmid = {37996991}, issn = {1755-0998}, support = {31971397//National Natural Science Foundation of China/ ; 32270445//National Natural Science Foundation of China/ ; PP00P3_202669//Swiss National Science Foundation (SNSF)/ ; Y2019XK13//Central Public-interest Scientific Institution Basal Research Fund for Chinese Academy of Agricultural Sciences/ ; Y2021XK16//Central Public-interest Scientific Institution Basal Research Fund for Chinese Academy of Agricultural Sciences/ ; }, abstract = {The Asian honeybee, Apis cerana, is an ecologically and economically important pollinator. Mapping its genetic variation is key to understanding population-level health, histories and potential capacities to respond to environmental changes. However, most efforts to date were focused on single nucleotide polymorphisms (SNPs) based on a single reference genome, thereby ignoring larger scale genomic variation. We employed long-read sequencing technologies to generate a chromosome-scale reference genome for the ancestral group of A. cerana. Integrating this with 525 resequencing data sets, we constructed the first pan-genome of A. cerana, encompassing almost the entire gene content. We found that 31.32% of genes in the pan-genome were variably present across populations, providing a broad gene pool for environmental adaptation. We identified and characterized structural variations (SVs) and found that they were not closely linked with SNP distributions; however, the formation of SVs was closely associated with transposable elements. Furthermore, phylogenetic analysis using SVs revealed a novel A. cerana ecological group not recoverable from the SNP data. Performing environmental association analysis identified a total of 44 SVs likely to be associated with environmental adaptation. Verification and analysis of one of these, a 330 bp deletion in the Atpalpha gene, indicated that this SV may promote the cold adaptation of A. cerana by altering gene expression. Taken together, our study demonstrates the feasibility and utility of applying pan-genome approaches to map and explore genetic feature variations of honeybee populations, and in particular to examine the role of SVs in the evolution and environmental adaptation of A. cerana.}, } @article {pmid37996397, year = {2023}, author = {Vos, M and Padfield, D and Quince, C and Vos, R}, title = {Adaptive radiations in natural populations of prokaryotes: innovation is key.}, journal = {FEMS microbiology ecology}, volume = {}, number = {}, pages = {}, doi = {10.1093/femsec/fiad154}, pmid = {37996397}, issn = {1574-6941}, abstract = {Prokaryote diversity makes up most of the tree of life and is crucial to the functioning of the biosphere and human health. However, the patterns and mechanisms of prokaryote diversification have received relatively little attention compared to animals and plants. Adaptive radiation, the rapid diversification of an ancestor species into multiple ecologically divergent species, is a fundamental process by which macrobiological diversity is generated. Here, we discuss whether ecological opportunity could lead to similar bursts of diversification in bacteria. We explore how adaptive radiations in prokaryotes can be kickstarted by horizontally acquired key innovations allowing lineages to invade new niche space that subsequently is partitioned among diversifying specialist descendants. We discuss how novel adaptive zones are colonised and exploited after the evolution of a key innovation and whether certain types of are more prone to adaptive radiation. Radiation into niche specialists does not necessarily lead to speciation in bacteria when barriers to recombination are absent. We propose that in this scenario, niche-specific genes could accumulate within a single lineage, leading to the evolution of an open pan-genome.}, } @article {pmid37995844, year = {2023}, author = {Bonnici, V and Mengoni, C and Mangoni, M and Franco, G and Giugno, R}, title = {PanDelos-frags: A methodology for discovering pangenomic content of incomplete microbial assemblies.}, journal = {Journal of biomedical informatics}, volume = {}, number = {}, pages = {104552}, doi = {10.1016/j.jbi.2023.104552}, pmid = {37995844}, issn = {1532-0480}, abstract = {Pangenomics was originally defined as the problem of comparing the composition of genes into gene families within a set of bacterial isolates belonging to the same species. The problem requires the calculation of sequence homology among such genes. When combined with metagenomics, namely for human microbiome composition analysis, gene-oriented pangenome detection becomes a promising method to decipher ecosystem functions and population-level evolution. Established computational tools are able to investigate the genetic content of isolates for which a complete genomic sequence is available. However, there is a plethora of incomplete genomes that are available on public resources, which only a few tools may analyze. Incomplete means that the process for reconstructing their genomic sequence is not complete, and only fragments of their sequence are currently available. However, the information contained in these fragments may play an essential role in the analyses. Here, we present PanDelos-frags, a computational tool which exploits and extends previous results in analysing complete genomes. It provides a new methodology for inferring missing genetic information and thus for managing incomplete genomes. PanDelos-frags outperforms state-of-the-art approaches in reconstructing gene families in synthetic benchmarks and in a real use case of metagenomics. PanDelos-frags is publicly available at https://github.com/InfOmics/PanDelos-frags.}, } @article {pmid37993882, year = {2023}, author = {Rice, ES and Alberdi, A and Alfieri, J and Athrey, G and Balacco, JR and Bardou, P and Blackmon, H and Charles, M and Cheng, HH and Fedrigo, O and Fiddaman, SR and Formenti, G and Frantz, LAF and Gilbert, MTP and Hearn, CJ and Jarvis, ED and Klopp, C and Marcos, S and Mason, AS and Velez-Irizarry, D and Xu, L and Warren, WC}, title = {A pangenome graph reference of 30 chicken genomes allows genotyping of large and complex structural variants.}, journal = {BMC biology}, volume = {21}, number = {1}, pages = {267}, pmid = {37993882}, issn = {1741-7007}, support = {2020-67015-31574//National Institute of Food and Agriculture/ ; 2022-67015-36218//National Institute of Food and Agriculture/ ; 817729//HORIZON EUROPE Research Infrastructures/ ; }, abstract = {BACKGROUND: The red junglefowl, the wild outgroup of domestic chickens, has historically served as a reference for genomic studies of domestic chickens. These studies have provided insight into the etiology of traits of commercial importance. However, the use of a single reference genome does not capture diversity present among modern breeds, many of which have accumulated molecular changes due to drift and selection. While reference-based resequencing is well-suited to cataloging simple variants such as single-nucleotide changes and short insertions and deletions, it is mostly inadequate to discover more complex structural variation in the genome.

METHODS: We present a pangenome for the domestic chicken consisting of thirty assemblies of chickens from different breeds and research lines.

RESULTS: We demonstrate how this pangenome can be used to catalog structural variants present in modern breeds and untangle complex nested variation. We show that alignment of short reads from 100 diverse wild and domestic chickens to this pangenome reduces reference bias by 38%, which affects downstream genotyping results. This approach also allows for the accurate genotyping of a large and complex pair of structural variants at the K feathering locus using short reads, which would not be possible using a linear reference.

CONCLUSIONS: We expect that this new paradigm of genomic reference will allow better pinpointing of exact mutations responsible for specific phenotypes, which will in turn be necessary for breeding chickens that meet new sustainability criteria and are resilient to quickly evolving pathogen threats.}, } @article {pmid37991492, year = {2023}, author = {Glad, HM and Tralamazza, SM and Croll, D}, title = {The expression landscape and pangenome of long non-coding RNA in the fungal wheat pathogen Zymoseptoria tritici.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001136}, pmid = {37991492}, issn = {2057-5858}, abstract = {Long non-coding RNAs (lncRNAs) are regulatory molecules interacting in a wide array of biological processes. lncRNAs in fungal pathogens can be responsive to stress and play roles in regulating growth and nutrient acquisition. Recent evidence suggests that lncRNAs may also play roles in virulence, such as regulating pathogenicity-associated enzymes and on-host reproductive cycles. Despite the importance of lncRNAs, only a few model fungi have well-documented inventories of lncRNA. In this study, we apply a recent computational pipeline to predict high-confidence lncRNA candidates in Zymoseptoria tritici, an important global pathogen of wheat impacting global food production. We analyse genomic features of lncRNAs and the most likely associated processes through analyses of expression over a host infection cycle. We find that lncRNAs are frequently expressed during early infection, before the switch to necrotrophic growth. They are mostly located in facultative heterochromatic regions, which are known to contain many genes associated with pathogenicity. Furthermore, we find that lncRNAs are frequently co-expressed with genes that may be involved in responding to host defence signals, such as oxidative stress. Finally, we assess pangenome features of lncRNAs using four additional reference-quality genomes. We find evidence that the repertoire of expressed lncRNAs varies substantially between individuals, even though lncRNA loci tend to be shared at the genomic level. Overall, this study provides a repertoire and putative functions of lncRNAs in Z. tritici enabling future molecular genetics and functional analyses in an important pathogen.}, } @article {pmid37986038, year = {2023}, author = {Hernández-Soto, LM and Martínez-Abarca, F and Ramírez-Saad, H and López-Pérez, M and Aguirre-Garrido, JF}, title = {Genome analysis of haloalkaline isolates from the soda saline crater lake of Isabel Island; comparative genomics and potential metabolic analysis within the genus Halomonas.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {696}, pmid = {37986038}, issn = {1471-2164}, abstract = {BACKGROUND: Isabel Island is a Mexican volcanic island primarily composed of basaltic stones. It features a maar known as Laguna Fragatas, which is classified as a meromictic thalassohaline lake. The constant deposition of guano in this maar results in increased levels of phosphorus, nitrogen, and carbon. The aim of this study was to utilize high-quality genomes from the genus Halomonas found in specialized databases as a reference for genome mining of moderately halophilic bacteria isolated from Laguna Fragatas. This research involved genomic comparisons employing phylogenetic, pangenomic, and metabolic-inference approaches.

RESULTS: The Halomonas genus exhibited a large open pangenome, but several genes associated with salt metabolism and homeostatic regulation (ectABC and betABC), nitrogen intake through nitrate and nitrite transporters (nasA, and narGI), and phosphorus uptake (pstABCS) were shared among the Halomonas isolates.

CONCLUSIONS: The isolated bacteria demonstrate consistent adaptation to high salt concentrations, and their nitrogen and phosphorus uptake mechanisms are highly optimized. This optimization is expected in an extremophile environment characterized by minimal disturbances or abrupt seasonal variations. The primary significance of this study lies in the dearth of genomic information available for this saline and low-disturbance environment. This makes it important for ecosystem conservation and enabling an exploration of its biotechnological potential. Additionally, the study presents the first two draft genomes of H. janggokensis.}, } @article {pmid37976619, year = {2023}, author = {Cheng, J and Wu, S and Ye, Q and Gu, Q and Zhang, Y and Ye, Q and Lin, R and Liang, X and Liu, Z and Bai, J and Zhang, J and Chen, M and Wu, Q}, title = {A novel multiplex PCR based method for the detection of Listeria monocytogenes clonal complex 8.}, journal = {International journal of food microbiology}, volume = {409}, number = {}, pages = {110475}, doi = {10.1016/j.ijfoodmicro.2023.110475}, pmid = {37976619}, issn = {1879-3460}, abstract = {Listeria monocytogenes is an important foodborne pathogen worldwide, which could cause listeriosis with a 20-30 % fatality rate in immunocompromised individuals. Listeria monocytogenes MLST clonal complex (CC) 8 strain is a common clone in food and clinical cases. The aim of this study was to develop multiplex PCR (mPCR) and high-resolution melting (HRM) qPCR to simultaneously detect L. monocytogenes CC8 and the other L. monocytogenes strains based on pan-genome analysis. A novel multiplex PCR and HRM qPCR targeted for the genes LM5578_1180 (specific for CC8) and LM5578_2262 (for L. monocytogenes) were developed. The specificity of this multiplex PCR and HRM qPCR were verified with other CCs of L. monocytogenes and other species strains. The detection limit of this multiplex PCR and HRM qPCR is 2.1 × 10[3] CFU/mL and 2.1 × 10[0] CFU/mL, respectively. This multiplex PCR and HRM qPCR could accurately detect CC8 strains with the interference of different ratios of L. monocytogenes CC9, CC87, CC121, CC155, and L. innocua strains. Subsequently, the detection ability of mPCR and HRM qPCR were also evaluated in spiked samples. The mPCR method could successfully detect 6.2 × 10[3] CFU/mL of CC8 L. monocytogenes after 6 h enrichment while the multiplex HRM qPCR method could successfully detect 6.2 × 10[4] CFU/mL of CC8 L. monocytogenes after 3 h enrichment. The feasibility of these methods were satisfactory in terms of sensitivity, specificity, and efficiency after evaluating 12 mushroom samples and was consistent with that of the National Standard Detection Method (GB4789.30-2016). In conclusion, the developed assays could be applied for rapid screening and detection of L. monocytogenes CC8 strains both in food and food production environments, providing accurate results to adopt monitoring measures to improve microbiological safety.}, } @article {pmid37976215, year = {2023}, author = {Corut, AK and Wallace, JG}, title = {kGWASflow: a modular, flexible, and reproducible Snakemake workflow for k-mers-based GWAS.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkad246}, pmid = {37976215}, issn = {2160-1836}, support = {//University of Georgia/ ; grant #1764127//National Science Foundation/ ; }, abstract = {Genome-wide association studies (GWAS) have been widely used to identify genetic variation associated with complex traits. Despite its success and popularity, the traditional GWAS approach comes with a variety of limitations. For this reason, newer methods for GWAS have been developed, including the use of pan-genomes instead of a reference genome and the utilization of markers beyond single-nucleotide polymorphisms, such as structural variations and k-mers. The k-mers-based GWAS approach has especially gained attention from researchers in recent years. However, these new methodologies can be complicated and challenging to implement. Here, we present kGWASflow, a modular, user-friendly, and scalable workflow to perform GWAS using k-mers. We adopted an existing kmersGWAS method into an easier and more accessible workflow using management tools like Snakemake and Conda and eliminated the challenges caused by missing dependencies and version conflicts. kGWASflow increases the reproducibility of the kmersGWAS method by automating each step with Snakemake and using containerization tools like Docker. The workflow encompasses supplemental components such as quality control, read-trimming procedures, and generating summary statistics. kGWASflow also offers post-GWAS analysis options to identify the genomic location and context of trait-associated k-mers. kGWASflow can be applied to any organism and requires minimal programming skills. kGWASflow is freely available on GitHub (https://github.com/akcorut/kGWASflow) and Bioconda (https://anaconda.org/bioconda/kgwasflow).}, } @article {pmid37975995, year = {2023}, author = {Khan, K and Jalal, K and Uddin, R}, title = {Pangenome diversification and resistance gene characterization in Salmonella Typhi prioritized RfaJ as a significant therapeutic marker.}, journal = {Journal, genetic engineering & biotechnology}, volume = {21}, number = {1}, pages = {125}, pmid = {37975995}, issn = {2090-5920}, abstract = {BACKGROUND: Salmonella Typhi stands as the etiological agent responsible for the onset of human typhoid fever. The pressing demand for innovative therapeutic targets against S. Typhi is underscored by the escalating prevalence of this pathogen and the severe nature of its infections. Consequently, this study employs pangenome analysis to scrutinize 119 S. Typhi-resistant strains, aiming to identify the most promising therapeutic targets originating from its core genome.

RESULTS: Subtractive genomics was employed to systematically eliminate non-homologous (n=1147), essential (n=551), drug-like (n=80), and pathogenicity-related (n=18) proteins from the initial pool of 3351 core genome proteins. Consequently, lipopolysaccharide 1,2-glucosyltransferase RfaJ was designated as the optimal pharmacological target due to its potential versatility. Furthermore, a compendium of 9000 FDA-approved compounds was repurposed for evaluation against the RfaJ drug target, with the specific intent of prioritizing novel, high-potency therapeutic candidates for combating S. Typhi. Ultimately, four compounds, namely DB00549 (Zafirlukast), DB15637 (Fluzoparib), DB15688 (Zavegepant), and DB12411 (Bemcentinib), were singled out as potential inhibitors based on the ligand-protein binding affinity (indicated by the lowest anticipated binding energy) and the overall stability of these compounds. Notably, molecular dynamics simulations, conducted over a 50 nanosecond interval, convincingly demonstrated the stability of these compounds in the context of the RfaJ protein.

CONCLUSION: In summary, the present findings hold significant promise as an initial stride in the broader drug discovery endeavor against S. Typhi infections. However, the experimental validation of the identified drug target and drug candidate is further required to increase the effectiveness of the applied methodology.}, } @article {pmid37974222, year = {2023}, author = {Baril, T and Croll, D}, title = {A pangenome-guided manually curated library of transposable elements for Zymoseptoria tritici.}, journal = {BMC research notes}, volume = {16}, number = {1}, pages = {335}, pmid = {37974222}, issn = {1756-0500}, support = {201149//Schweizerischer Nationalfonds zur Förderung der Wissenschaftlichen Forschung,Switzerland/ ; }, abstract = {OBJECTIVES: High-quality species-specific transposable element (TE) libraries are required for studies to elucidate the evolutionary dynamics of TEs and gain an understanding of their impacts on host genomes. Such high-quality TE resources are severely lacking for species in the fungal kingdom. To facilitate future studies on the putative role of TEs in rapid adaptation observed in the fungal wheat pathogen Zymoseptoria tritici, we produced a manually curated TE library. This was generated by detecting TEs in 19 reference genome assemblies representing the global diversity of the species supplemented by multiple sister species genomes. Improvements over previous TE libraries have been made on TE boundary resolution, detection of ORFs, TE domains, terminal inverted repeats, and class-specific motifs.

DATA DESCRIPTION: A TE consensus library for Z. tritici formatted for use with RepeatMasker. This data is relevant to other researchers investigating TE-host evolutionary dynamics in Z. tritici or who are interested in comparative studies of the fungal kingdom. Further, this TE library can be used to improve gene annotation. Finally, this TE library increases the number of manually curated TE datasets, providing resources to further our understanding of TE diversity.}, } @article {pmid37974097, year = {2023}, author = {Ferhaoui, N and Tanaka, R and Sekizuka, T and Kuroda, M and Sebaihia, M}, title = {Whole genome sequencing and pan-genome analysis of Staphylococcus/Mammaliicoccus spp. isolated from diabetic foot ulcers and contralateral healthy skin of Algerian patients.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {342}, pmid = {37974097}, issn = {1471-2180}, abstract = {BACKGROUND: Diabetic foot infections (DFIs) are the most common complications of diabetic foot ulcers (DFUs), and a significant cause of lower extremity amputation. In this study we used whole genome sequencing to characterize the clonal composition, virulence and resistance genetic determinants of 58 Staphylococcus/Mammaliicoccus spp. isolates from contralateral healthy skin and DFU from 44 hospitalized patients.

RESULTS: S. aureus (n = 32) and S. epidermidis (n = 10) isolates were recovered from both DFUs and healthy skin, whereas, S. haemolyticus (n = 8), M. sciuri (n = 1), S. hominis (n = 1) and S. simulans (n = 3) were recovered exclusively from healthy skin. In contrast, S. caprae (n = 2) and S. saprophyticus (n = 1) were recovered only from DFUs. Among S. aureus isolates, MRSA were present with high prevalence (27/32, 84.4%), 18 of which (66.7%) were from DFUs and 9 (33.3%) from healthy skin. In contrast, the coagulase-negative Staphylococcus (CoNS)/Mammaliicoccus isolates (n = 26), in particular S. epidermidis and S. haemolyticus were more prevalent in healthy skin, (10/26, 38.5%) and (8/26, 30.8%), respectively. MLST, spa and SCCmec typing classified the 32 S. aureus isolates into 6 STs, ST672, ST80, ST241, ST1, ST97, ST291 and 4 unknown STs (STNF); 8 spa types, t044, t037, t3841, t1247, t127, t639, t937 and t9432 and 2 SCCmec types, type IV and type III(A). Among CoNS, the S. epidermidis isolates belonged to ST54, ST35 and ST640. S. haemolyticus belonged to ST3, ST25, ST29, ST1 and ST56. The sole M. sciuri isolate was found to carry an SCCmec type III(A). A wide range of virulence genes and antimicrobial resistance genes were found among our isolates, with varying distribution between species or STs. The pan-genome analysis revealed a highly clonal population of Staphylococcus isolates, particularly among S. aureus isolates. Interestingly, the majority of S. aureus isolates including MRSA, recovered from the healthy skin and DFUs of the same patient belonged to the same clone and exhibited similar virulence/resistance genotype.

CONCLUSIONS: Our study provides clinically relevant information on the population profile, virulence and antibiotic resistance of Staphylococcus/Mammaliicoccus spp. in DFIs, which could serve as a basis for further studies on these as well as other groups of pathogens associated with DFIs.}, } @article {pmid37972151, year = {2023}, author = {McLaughlin, M and Fiebig, A and Crosson, S}, title = {XRE transcription factors conserved in Caulobacter and φCbK modulate adhesin development and phage production.}, journal = {PLoS genetics}, volume = {19}, number = {11}, pages = {e1011048}, doi = {10.1371/journal.pgen.1011048}, pmid = {37972151}, issn = {1553-7404}, abstract = {The xenobiotic response element (XRE) family of transcription factors (TFs), which are commonly encoded by bacteria and bacteriophage, regulate diverse features of bacterial cell physiology and impact phage infection dynamics. Through a pangenome analysis of Caulobacter species isolated from soil and aquatic ecosystems, we uncovered an apparent radiation of a paralogous XRE TF gene cluster, several of which have established functions in the regulation of holdfast adhesin development and biofilm formation in C. crescentus. We further discovered related XRE TFs throughout the class Alphaproteobacteria and its phages, including the φCbK Caulophage, suggesting that members of this cluster impact host-phage interactions. Here we show that a closely related group of XRE transcription factors encoded by both C. crescentus and φCbK can physically interact and function to control the transcription of a common gene set, influencing processes including holdfast development and the production of φCbK virions. The φCbK-encoded XRE paralog, tgrL, is highly expressed at the earliest stages of infection and can directly inhibit transcription of host genes including hfiA, a potent holdfast inhibitor, and gafYZ, an activator of prophage-like gene transfer agents (GTAs). XRE proteins encoded from the C. crescentus chromosome also directly repress gafYZ transcription, revealing a functionally redundant set of host regulators that may protect against spurious production of GTA particles and inadvertent cell lysis. Deleting the C. crescentus XRE transcription factors reduced φCbK burst size, while overexpressing these host genes or φCbK tgrL rescued this burst defect. We conclude that this XRE TF gene cluster, shared by C. crescentus and φCbK, plays an important role in adhesion regulation under phage-free conditions, and influences host-phage dynamics during infection.}, } @article {pmid37971714, year = {2023}, author = {Sharma, PK and Ahmed, HI and Heuberger, M and Koo, DH and Quiroz-Chavez, J and Adhikari, L and Raupp, J and Cauet, S and Rodde, N and Cravero, C and Callot, C and Yadav, IS and Kathiresan, N and Athiyannan, N and Ramirez-Gonzalez, RH and Uauy, C and Wicker, T and Abrouk, M and Gu, YQ and Poland, J and Krattinger, SG and Lazo, GR and Tiwari, VK}, title = {An online database for einkorn wheat to aid in gene discovery and functional genomics studies.}, journal = {Database : the journal of biological databases and curation}, volume = {2023}, number = {}, pages = {}, doi = {10.1093/database/baad079}, pmid = {37971714}, issn = {1758-0463}, support = {Award #2020-67013-31460//National Institute of Food and Agriculture/ ; Award #2020-67013-31460//National Institute of Food and Agriculture/ ; }, abstract = {Diploid A-genome wheat (einkorn wheat) presents a nutrition-rich option as an ancient grain crop and a resource for the improvement of bread wheat against abiotic and biotic stresses. Realizing the importance of this wheat species, reference-level assemblies of two einkorn wheat accessions were generated (wild and domesticated). This work reports an einkorn genome database that provides an interface to the cereals research community to perform comparative genomics, applied genetics and breeding research. It features queries for annotated genes, the use of a recent genome browser release, and the ability to search for sequence alignments using a modern BLAST interface. Other features include a comparison of reference einkorn assemblies with other wheat cultivars through genomic synteny visualization and an alignment visualization tool for BLAST results. Altogether, this resource will help wheat research and breeding. Database URL https://wheat.pw.usda.gov/GG3/pangenome.}, } @article {pmid37968318, year = {2023}, author = {Wang, T and Duan, S and Xu, C and Wang, Y and Zhang, X and Xu, X and Chen, L and Han, Z and Wu, T}, title = {Pan-genome analysis of 13 Malus accessions reveals structural and sequence variations associated with fruit traits.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {7377}, pmid = {37968318}, issn = {2041-1723}, support = {32072543//National Natural Science Foundation of China (National Science Foundation of China)/ ; }, abstract = {Structural variations (SVs) and copy number variations (CNVs) contribute to trait variations in fleshy-fruited species. Here, we assemble 10 genomes of genetically diverse Malus accessions, including the ever-green cultivar 'Granny Smith' and the widely cultivated cultivar 'Red Fuji'. Combining with three previously reported genomes, we assemble the pan-genome of Malus species and identify 20,220 CNVs and 317,393 SVs. We also observe CNVs that are positively correlated with expression levels of the genes they are associated with. Furthermore, we show that the noncoding RNA generated from a 209 bp insertion in the intron of mitogen-activated protein kinase homology encoding gene, MMK2, regulates the gene expression and affects fruit coloration. Moreover, we identify overlapping SVs associated with fruit quality and biotic resistance. This pan-genome uncovers possible contributions of CNVs to gene expression and highlights the role of SVs in apple domestication and economically important traits.}, } @article {pmid37966169, year = {2023}, author = {Nagano, DS and Taniguchi, I and Ono, T and Nakamura, K and Gotoh, Y and Hayashi, T}, title = {Systematic analysis of plasmids of the Serratia marcescens complex using 142 closed genomes.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001135}, pmid = {37966169}, issn = {2057-5858}, abstract = {Plasmids play important roles in bacterial genome diversification. In the Serratia marcescens complex (SMC), a notable contribution of plasmids to genome diversification was also suggested by our recent analysis of >600 draft genomes. As accurate analyses of plasmids in draft genomes are difficult, in this study we analysed 142 closed genomes covering the entire complex, 67 of which were obtained in this study, and identified 132 plasmids (1.9-244.4 kb in length) in 77 strains. While the average numbers of plasmids in clinical and non-clinical strains showed no significant difference, strains belonging to clade 2 (one of the two hospital-adapted lineages) contained more plasmids than the others. Pangenome analysis revealed that of the 28 954 genes identified, 12.8 % were plasmid-specific, and 1.4 % were present in plasmids or chromosomes depending on the strain. In the latter group, while transposon-related genes were most prevalent (31.4 % of the function-predicted genes), genes related to antimicrobial resistance and heavy metal resistance accounted for a notable proportion (22.7 %). Mash distance-based clustering separated the 132 plasmids into 23 clusters and 50 singletons. Most clusters/singletons showed notably different GC contents compared to those of host chromosomes, suggesting their recent or relatively recent appearance in the SMC. Among the 23 clusters, 17 were found in only clinical or only non-clinical strains, suggesting the possible preference of their distribution on the environmental niches of host strains. Regarding the host strain phylogeny, 16 clusters were distributed in two or more clades, suggesting their interclade transmission. Moreover, for many plasmids, highly homologous plasmids were found in other species, indicating the broadness of their potential host ranges, beyond the genus, family, order, class or even phylum level. Importantly, highly homologous plasmids were most frequently found in Klebsiella pneumoniae and other species in the family Enterobacteriaceae, suggesting that this family, particularly K. pneumoniae, is the main source for plasmid exchanges with the SMC. These results highlight the power of closed genome-based analysis in the investigation of plasmids and provide important insights into the nature of plasmids distributed in the SMC.}, } @article {pmid37965675, year = {2023}, author = {Zhou, X and Kang, X and Chen, J and Song, Y and Jia, C and Teng, L and Tang, Y and Jiang, Z and Peng, X and Tao, X and Xu, Y and Huang, L and Xu, X and Xu, Y and Zhang, T and Yu, S and Gong, J and Wang, S and Liu, Y and Zhu, G and Kehrenberg, C and Weill, FX and Barrow, P and Li, Y and Zhao, G and Yue, M}, title = {Genome degradation promotes Salmonella pathoadaptation by remodeling fimbriae-mediated proinflammatory response.}, journal = {National science review}, volume = {10}, number = {10}, pages = {nwad228}, pmid = {37965675}, issn = {2053-714X}, abstract = {Understanding changes in pathogen behavior (e.g. increased virulence, a shift in transmission channel) is critical for the public health management of emerging infectious diseases. Genome degradation via gene depletion or inactivation is recognized as a pathoadaptive feature of the pathogen evolving with the host. However, little is known about the exact role of genome degradation in affecting pathogenic behavior, and the underlying molecular detail has yet to be examined. Using large-scale global avian-restricted Salmonella genomes spanning more than a century, we projected the genetic diversity of Salmonella Pullorum (bvSP) by showing increasingly antimicrobial-resistant ST92 prevalent in Chinese flocks. The phylogenomic analysis identified three lineages in bvSP, with an enhancement of virulence in the two recently emerged lineages (L2/L3), as evidenced in chicken and embryo infection assays. Notably, the ancestor L1 lineage resembles the Salmonella serovars with higher metabolic flexibilities and more robust environmental tolerance, indicating stepwise evolutionary trajectories towards avian-restricted lineages. Pan-genome analysis pinpointed fimbrial degradation from a virulent lineage. The later engineered fim-deletion mutant, and all other five fimbrial systems, revealed behavior switching that restricted horizontal fecal-oral transmission but boosted virulence in chicks. By depleting fimbrial appendages, bvSP established persistent replication with less proinflammation in chick macrophages and adopted vertical transovarial transmission, accompanied by ever-increasing intensification in the poultry industry. Together, we uncovered a previously unseen paradigm for remodeling bacterial surface appendages that supplements virulence-enhanced evolution with increased vertical transmission.}, } @article {pmid37965009, year = {2023}, author = {Tahir Ul Qamar, M and Sadaqat, M and Zhu, XT and Li, H and Huang, X and Fatima, K and Almutairi, MM and Chen, LL}, title = {Comparative genomics profiling revealed multi-stress responsive roles of the CC-NBS-LRR genes in three mango cultivars.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1285547}, pmid = {37965009}, issn = {1664-462X}, abstract = {The nucleotide-binding site-leucine-rich repeat (NBS-LRR) gene family is the largest group of disease resistance (R) genes in plants and is active in response to viruses, bacteria, and fungi usually involved in effector-triggered immunity (ETI). Pangenome-wide studies allow researchers to analyze the genetic diversity of multiple species or their members simultaneously, providing a comprehensive understanding of the evolutionary relationships and diversity present among them. The draft pan-genome of three Mangifera indica cultivars (Alphonso, Hong Xiang Ya, and Tommy atkins) was constructed and Presence/absence variants (PAVs) were filtered through the ppsPCP pipeline. As a result, 2823 genes and 5907 PAVs from H. Xiang Ya, and 1266 genes and 2098 PAVs from T. atkins were added to the reference genome. For the identification of CC-NBS-LRR (CNL) genes in these mango cultivars, this draft pan-genome study has successfully identified 47, 27, and 36 members in Alphonso, H. Xiang Ya, and T. atkins respectively. The phylogenetic analysis divided MiCNL proteins into four distinct subgroups. All MiCNL genes are unevenly distributed on chromosomes. Both tandem and segmental duplication events played a significant role in the expansion of the CNL gene family. These genes contain cis-elements related to light, stress, hormone, and development. The analysis of protein-protein interactions (PPI) revealed that MiCNL proteins interacted with other defense-responsive proteins. Gene Ontology (GO) analysis indicated that MiCNL genes play a role in defense mechanisms within the organism. The expression level of the identified genes in fruit peel was observed under disease and cold stress which showed that Mi_A_CNL13 and 14 were up-regulated while Mi_A_CNL15, 25, 30, 31, and 40 were down-regulated in disease stress. On the other hand, Mi_A_CNL2, 14, 41, and 45 were up-regulated and Mi_A_CNL47 is down-regulated in cold stress. Subsequently, the Random Forest (RF) classifier was used to assess the multi-stress response of MiCNLs. It was found that Mi_A_CNL14 is a gene that responds to multiple stress conditions. The CNLs have similar protein structures which show that they are involved in the same function. The above findings provide a foundation for a deeper understanding of the functional characteristics of the mango CNL gene family.}, } @article {pmid37961986, year = {2023}, author = {Hu, H and Scheben, A and Wang, J and Li, F and Li, C and Edwards, D and Zhao, J}, title = {Unravelling inversions: Technological advances, challenges, and potential impact on crop breeding.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14224}, pmid = {37961986}, issn = {1467-7652}, support = {2022-NPY-00-005//Seed industry revitalization project of the special fund for the rural revitalization strategy of Guangdong Province in 2022/ ; 2022-NPY-00-004//Seed industry revitalization project of the special fund for the rural revitalization strategy of Guangdong Province in 2022/ ; 2023YG01//the "YouGu" Plan of Rice Research Institute of Guangdong Academy of Agricultural Sciences/ ; 2023YG04//the "YouGu" Plan of Rice Research Institute of Guangdong Academy of Agricultural Sciences/ ; 2023KJ106//the Innovation Team Project of Guangdong Modern Agricultural Industrial System/ ; //Guangdong Key Laboratory of New Technology in Rice Breeding: 2023B1212060042/ ; R2023YJ-QC001//Introduction of Young Key Talents of Guangdong Academy of Agricultural Sciences/ ; }, abstract = {Inversions, a type of chromosomal structural variation, significantly influence plant adaptation and gene functions by impacting gene expression and recombination rates. However, compared with other structural variations, their roles in functional biology and crop improvement remain largely unexplored. In this review, we highlight technological and methodological advancements that have allowed a comprehensive understanding of inversion variants through the pangenome framework and machine learning algorithms. Genome editing is an efficient method for inducing or reversing inversion mutations in plants, providing an effective mechanism to modify local recombination rates. Given the potential of inversions in crop breeding, we anticipate increasing attention on inversions from the scientific community in future research and breeding applications.}, } @article {pmid37961660, year = {2023}, author = {Zakeri, M and Brown, NK and Ahmed, OY and Gagie, T and Langmead, B}, title = {Movi: a fast and cache-efficient full-text pangenome index.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.11.04.565615}, pmid = {37961660}, abstract = {Efficient pangenome indexes are promising tools for many applications, including rapid classification of nanopore sequencing reads. Recently, a compressed-index data structure called the "move structure" was proposed as an alternative to other BWT-based indexes like the FM index and r-index. The move structure uniquely achieves both O(r) space and O(1)-time queries, where r is the number of runs in the pangenome BWT. We implemented Movi, an efficient tool for building and querying move-structure pangenome indexes. While the size of the Movi's index is larger than the r-index, it scales at a smaller rate for pangenome references, as its size is exactly proportional to r, the number of runs in the BWT of the reference. Movi can compute sophisticated matching queries needed for classification - such as pseudo-matching lengths - at least ten times faster than the fastest available methods. Movi achieves this speed by leveraging the move structure's strong locality of reference, incurring close to the minimum possible number of cache misses for queries against large pangenomes. Movi's fast constant-time query loop makes it well suited to real-time applications like adaptive sampling for nanopore sequencing, where decisions must be made in a small and predictable time interval.}, } @article {pmid37961321, year = {2023}, author = {Krieger, M and AbdelRahman, YM and Choi, D and Palmer, EA and Yoo, A and McGuire, S and Kreth, J and Merritt, J}, title = {The prevalence of Fusobacterium nucleatum subspecies in the oral cavity stratifies by local health status.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.10.25.563997}, pmid = {37961321}, abstract = {The ubiquitous inflammophilic pathobiont Fusobacterium nucleatum is widely recognized for its strong association with a variety of human dysbiotic diseases such as periodontitis and oral/extraoral abscesses, as well as multiple types of cancer . F. nucleatum is currently subdivided into four subspecies: F. nucleatum subspecies nucleatum (Fn. nucleatum) , animalis (Fn. animalis), polymorphum (Fn. polymorphum), and vincentii/fusiforme (Fn. vincentii). Although these subspecies have been historically considered as functionally interchangeable in the oral cavity, direct clinical evidence is largely lacking for this assertion. Consequently, we assembled a collection of oral clinical specimens to determine whether F. nucleatum subspecies prevalence in the oral cavity stratifies by local oral health status. Patient-matched clinical specimens of both disease-free dental plaque and odontogenic abscess were analyzed with newly developed culture-dependent and culture-independent approaches using 44 and 60 oral biofilm/tooth abscess paired specimens, respectively. Most oral cavities were found to simultaneously harbor multiple F. nucleatum subspecies, with a greater diversity present within dental plaque compared to abscesses. In dental plaque, Fn. polymorphum is clearly the dominant organism, but this changes dramatically within odontogenic abscesses where Fn. animalis is heavily favored over all other fusobacteria. Surprisingly, the most commonly studied F. nucleatum subspecies, Fn. nucleatum, is only a minor constituent in the oral cavity. To gain further insights into the genetic basis for these phenotypes, we subsequently performed pangenome, phylogenetic, and functional enrichment analyses of oral fusobacterial genomes using the Anvi'o platform, which revealed significant genotypic distinctions among F. nucleatum subspecies. Accordingly, our results strongly support a taxonomic reassignment of each F. nucleatum subspecies into distinct Fusobacterium species. Of these, Fn. animalis should be considered as the most clinically relevant at sites of active inflammation, despite being among the least characterized oral fusobacteria.}, } @article {pmid37960081, year = {2023}, author = {Pushkova, EN and Borkhert, EV and Novakovskiy, RO and Dvorianinova, EM and Rozhmina, TA and Zhuchenko, AA and Zhernova, DA and Turba, AA and Yablokov, AG and Sigova, EA and Krasnov, GS and Bolsheva, NL and Melnikova, NV and Dmitriev, AA}, title = {Selection of Flax Genotypes for Pan-Genomic Studies by Sequencing Tagmentation-Based Transcriptome Libraries.}, journal = {Plants (Basel, Switzerland)}, volume = {12}, number = {21}, pages = {}, doi = {10.3390/plants12213725}, pmid = {37960081}, issn = {2223-7747}, support = {075-15-2021-1064//Ministry of Science and Higher Education of the Russian Federation/ ; }, abstract = {Flax (Linum usitatissimum L.) products are used in the food, pharmaceutical, textile, polymer, medical, and other industries. The creation of a pan-genome will be an important advance in flax research and breeding. The selection of flax genotypes that sufficiently cover the species diversity is a crucial step for the pan-genomic study. For this purpose, we have adapted a method based on Illumina sequencing of transcriptome libraries prepared using the Tn5 transposase (tagmentase). This approach reduces the cost of sample preparation compared to commercial kits and allows the generation of a large number of cDNA libraries in a short time. RNA-seq data were obtained for 192 flax plants (3-6 individual plants from 44 flax accessions of different morphology and geographical origin). Evaluation of the genetic relationship between flax plants based on the sequencing data revealed incorrect species identification for five accessions. Therefore, these accessions were excluded from the sample set for the pan-genomic study. For the remaining samples, typical genotypes were selected to provide the most comprehensive genetic diversity of flax for pan-genome construction. Thus, high-throughput sequencing of tagmentation-based transcriptome libraries showed high efficiency in assessing the genetic relationship of flax samples and allowed us to select genotypes for the flax pan-genomic analysis.}, } @article {pmid37957573, year = {2023}, author = {Dutta, B and Halder, U and Chitikineni, A and Varshney, RK and Bandopadhyay, R}, title = {Delving into the lifestyle of Sundarban Wetland resident, biofilm producing, halotolerant Salinicoccus roseus: a comparative genomics-based intervention.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {681}, pmid = {37957573}, issn = {1471-2164}, abstract = {BACKGROUND: Microbial community played an essential role in ecosystem processes, be it mangrove wetland or other intertidal ecologies. Several enzymatic activities like hydrolases are effective ecological indicators of soil microbial function. So far, little is known on halophilic bacterial contribution and function on a genomic viewpoint of Indian Sundarban Wetland. Considering the above mentioned issues, the aims of this study was to understand the life style, metabolic functionalities and genomic features of the isolated bacterium, Salinicoccus roseus strain RF1H. A comparative genome-based study of S. roseus has not been reported yet. Henceforth, we have considered the inclusion of the intra-species genome comparison of S. roseus to gain insight into the high degree of variation in the genome of strain RF1H among others.

RESULTS: Salinicoccus roseus strain RF1H is a pink-red pigmented, Gram-positive and non-motile cocci. The bacterium exhibited high salt tolerance (up to 15% NaCl), antibiotic resistance, biofilm formation and secretion of extracellular hydrolytic enzymes. The circular genome was approximately 2.62978 Mb in size, encoding 574 predicted genes with GC content 49.5%. Presence of genomic elements (prophages, transposable elements, CRISPR-Cas system) represented bacterial virulence and multidrug-resistance. Furthermore, genes associated with salt tolerance, temperature adaptation and DNA repair system were distributed in 17 genomic islands. Genes related to hydrocarbon degradation manifested metabolic capability of the bacterium for potential biotechnological applications. A comparative pangenome analysis revealed two-component response regulator, modified C4-dicarboxylate transport system and osmotic stress regulated ATP-binding proteins. Presence of genes encoding arginine decarboxylase (ADC) enzyme being involved in biofilm formation was reported from the genome. In silico study revealed the protein is thermostable and made up with ~ 415 amino acids, and hydrophilic in nature. Three motifs appeared to be evolutionary conserved in all Salinicoccus sequences.

CONCLUSION: The first report of whole genome analysis of Salinicoccus roseus strain RF1H provided information of metabolic functionalities, biofilm formation, resistance mechanism and adaptation strategies to thrive in climate-change induced vulnerable spot like Sundarban. Comparative genome analysis highlighted the unique genome content that contributed the strain's adaptability. The biomolecules produced during metabolism are important sources of compounds with potential beneficial applications in pharmaceuticals.}, } @article {pmid37956283, year = {2023}, author = {Joglekar, P and Conlan, S and Lee-Lin, SQ and Deming, C and Kashaf, SS and , and Kong, HH and Segre, JA}, title = {Integrated genomic and functional analyses of human skin-associated Staphylococcus reveal extensive inter- and intra-species diversity.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {47}, pages = {e2310585120}, doi = {10.1073/pnas.2310585120}, pmid = {37956283}, issn = {1091-6490}, support = {1ZIAHG000180-16//HHS | NIH | National Human Genome Research Institute (NHGRI)/ ; 1ZIABC010938-02//HHS | NIH | National Institute of Arthritis and Musculoskeletal and Skin Diseases (NIAMS)/ ; }, abstract = {Human skin is stably colonized by a distinct microbiota that functions together with epidermal cells to maintain a protective physical barrier. Staphylococcus, a prominent genus of the skin microbiota, participates in colonization resistance, tissue repair, and host immune regulation in strain-specific manners. To unlock the potential of engineering skin microbial communities, we aim to characterize the diversity of this genus within the context of the skin environment. We reanalyzed an extant 16S rRNA amplicon dataset obtained from distinct body sites of healthy volunteers, providing a detailed biogeographic depiction of staphylococcal species that colonize our skin. S. epidermidis, S. capitis, and S. hominis were the most abundant staphylococcal species present in all volunteers and were detected at all body sites. Pan-genome analysis of isolates from these three species revealed that the genus-core was dominated by central metabolism genes. Species-restricted-core genes encoded known host colonization functions. The majority (~68%) of genes were detected only in a fraction of isolate genomes, underscoring the immense strain-specific gene diversity. Conspecific genomes grouped into phylogenetic clades, exhibiting body site preference. Each clade was enriched for distinct gene sets that are potentially involved in site tropism. Finally, we conducted gene expression studies of select isolates showing variable growth phenotypes in skin-like medium. In vitro expression revealed extensive intra- and inter-species gene expression variation, substantially expanding the functional diversification within each species. Our study provides an important resource for future ecological and translational studies to examine the role of shared and strain-specific staphylococcal genes within the skin environment.}, } @article {pmid37953337, year = {2023}, author = {Harrison, PW and Amode, MR and Austine-Orimoloye, O and Azov, AG and Barba, M and Barnes, I and Becker, A and Bennett, R and Berry, A and Bhai, J and Bhurji, SK and Boddu, S and Branco Lins, PR and Brooks, L and Ramaraju, SB and Campbell, LI and Martinez, MC and Charkhchi, M and Chougule, K and Cockburn, A and Davidson, C and De Silva, NH and Dodiya, K and Donaldson, S and El Houdaigui, B and Naboulsi, TE and Fatima, R and Giron, CG and Genez, T and Grigoriadis, D and Ghattaoraya, GS and Martinez, JG and Gurbich, TA and Hardy, M and Hollis, Z and Hourlier, T and Hunt, T and Kay, M and Kaykala, V and Le, T and Lemos, D and Lodha, D and Marques-Coelho, D and Maslen, G and Merino, GA and Mirabueno, LP and Mushtaq, A and Hossain, SN and Ogeh, DN and Sakthivel, MP and Parker, A and Perry, M and Piližota, I and Poppleton, D and Prosovetskaia, I and Raj, S and Pérez-Silva, JG and Salam, AIA and Saraf, S and Saraiva-Agostinho, N and Sheppard, D and Sinha, S and Sipos, B and Sitnik, V and Stark, W and Steed, E and Suner, MM and Surapaneni, L and Sutinen, K and Tricomi, FF and Urbina-Gómez, D and Veidenberg, A and Walsh, TA and Ware, D and Wass, E and Willhoft, NL and Allen, J and Alvarez-Jarreta, J and Chakiachvili, M and Flint, B and Giorgetti, S and Haggerty, L and Ilsley, GR and Keatley, J and Loveland, JE and Moore, B and Mudge, JM and Naamati, G and Tate, J and Trevanion, SJ and Winterbottom, A and Frankish, A and Hunt, SE and Cunningham, F and Dyer, S and Finn, RD and Martin, FJ and Yates, AD}, title = {Ensembl 2024.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad1049}, pmid = {37953337}, issn = {1362-4962}, support = {222155/Z/20/Z/WT_/Wellcome Trust/United Kingdom ; 75N93019C00077/AI/NIAID NIH HHS/United States ; 226458/Z/22/Z/WT_/Wellcome Trust/United Kingdom ; }, abstract = {Ensembl (https://www.ensembl.org) is a freely available genomic resource that has produced high-quality annotations, tools, and services for vertebrates and model organisms for more than two decades. In recent years, there has been a dramatic shift in the genomic landscape, with a large increase in the number and phylogenetic breadth of high-quality reference genomes, alongside major advances in the pan-genome representations of higher species. In order to support these efforts and accelerate downstream research, Ensembl continues to focus on scaling for the rapid annotation of new genome assemblies, developing new methods for comparative analysis, and expanding the depth and quality of our genome annotations. This year we have continued our expansion to support global biodiversity research, doubling the number of annotated genomes we support on our Rapid Release site to over 1700, driven by our close collaboration with biodiversity projects such as Darwin Tree of Life. We have also strengthened support for key agricultural species, including the first regulatory builds for farmed animals, and have updated key tools and resources that support the global scientific community, notably the Ensembl Variant Effect Predictor. Ensembl data, software, and tools are freely available.}, } @article {pmid37953330, year = {2023}, author = {Raney, BJ and Barber, GP and Benet-Pagès, A and Casper, J and Clawson, H and Cline, MS and Diekhans, M and Fischer, C and Navarro Gonzalez, J and Hickey, G and Hinrichs, AS and Kuhn, RM and Lee, BT and Lee, CM and Le Mercier, P and Miga, KH and Nassar, LR and Nejad, P and Paten, B and Perez, G and Schmelter, D and Speir, ML and Wick, BD and Zweig, AS and Haussler, D and Kent, WJ and Haeussler, M}, title = {The UCSC Genome Browser database: 2024 update.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad987}, pmid = {37953330}, issn = {1362-4962}, support = {2U24HG002371/HG/NHGRI NIH HHS/United States ; 75N93019C00076/AI/NIAID NIH HHS/United States ; RF1MH132662/MH/NIMH NIH HHS/United States ; }, abstract = {The UCSC Genome Browser (https://genome.ucsc.edu) is a web-based genomic visualization and analysis tool that serves data to over 7,000 distinct users per day worldwide. It provides annotation data on thousands of genome assemblies, ranging from human to SARS-CoV2. This year, we have introduced new data from the Human Pangenome Reference Consortium and on viral genomes including SARS-CoV2. We have added 1,200 new genomes to our GenArk genome system, increasing the overall diversity of our genomic representation. We have added support for nine new user-contributed track hubs to our public hub system. Additionally, we have released 29 new tracks on the human genome and 11 new tracks on the mouse genome. Collectively, these new features expand both the breadth and depth of the genomic knowledge that we share publicly with users worldwide.}, } @article {pmid37953085, year = {2023}, author = {Li, Y and Wu, Y and Li, D and Du, L and Zhao, L and Wang, R and Chen, X and Jia, X and Ma, R and Wang, T and Li, J and Zhang, G and Wang, X and Hu, M and Chen, X and Wang, X and Kang, W and Sun, H and Xu, Y and Liu, Y}, title = {Multicenter comparative genomic study of Klebsiella oxytoca complex reveals a highly antibiotic-resistant subspecies of Klebsiellamichiganensis.}, journal = {Journal of microbiology, immunology, and infection = Wei mian yu gan ran za zhi}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jmii.2023.10.014}, pmid = {37953085}, issn = {1995-9133}, abstract = {BACKGROUND: The Klebsiella oxytoca complex is an opportunistic pathogen that has been recently identified as an actual complex. However, the characteristics of each species remain largely unknown. We aimed to study the clinical prevalence, antimicrobial profiles, genetic differences, and interaction with the host of each species of this complex.

METHODS: One hundred and three clinical isolates of the K. oxytoca complex were collected from 33 hospitals belonging to 19 areas in China from 2020 to 2021. Species were identified using whole genome sequencing based on average nucleotide identity. Clinical infection characteristics of the species were analyzed. Comparative genomics and pan-genome analyses were performed on these isolates and an augmented dataset, including 622 assemblies from the National Center for Biotechnology Information. In vitro assays evaluating the adhesion ability of human respiratory epithelial cells and survivability against macrophages were performed on randomly selected isolates.

RESULTS: Klebsiella michiganensis (46.6%, 48/103) and K. oxytoca (35.92%, 37/103) were the major species of the complex causing human infections. K. michiganensis had a higher genomic diversity and larger pan-genome size than did K. oxytoca. K. michiganensis isolates with blaoxy-5 had a higher resistance rate to various antibiotics, antimicrobial gene carriage rate, adhesion ability to human respiratory epithelial cells, and survival rate against macrophages than isolates of other species.

CONCLUSION: Our study revealed the genetic diversity of K. michiganensis and firstly identified the highly antimicrobial-resistant profile of K. michiganensis carrying blaoxy-5.}, } @article {pmid37951618, year = {2023}, author = {Laux, M and Piroupo, CM and Setubal, JC and Giani, A}, title = {The Raphidiopsis (= Cylindrospermopsis) raciborskii pangenome updated: Two new metagenome-assembled genomes from the South American clade.}, journal = {Harmful algae}, volume = {129}, number = {}, pages = {102518}, doi = {10.1016/j.hal.2023.102518}, pmid = {37951618}, issn = {1878-1470}, abstract = {Two Raphidiopsis (=Cylindrospermopsis) raciborskii metagenome-assembled genomes (MAGs) were recovered from two freshwater metagenomic datasets sampled in 2011 and 2012 in Pampulha Lake, a hypereutrophic, artificial, shallow reservoir, located in the city of Belo Horizonte (MG), Brazil. Since the late 1970s, the lake has undergone increasing eutrophication pressure, due to wastewater input, leading to the occurrence of frequent cyanobacterial blooms. The major difference observed between PAMP2011 and PAMP2012 MAGs was the lack of the saxitoxin gene cluster in PAMP2012, which also presented a smaller genome, while PAMP2011 presented the complete sxt cluster and all essential proteins and clusters. The pangenome analysis was performed with all Raphidiopsis/Cylindrospermopsis genomes available at NCBI to date, with the addition of PAMP2011 and PAMP2012 MAGs (All33 subset), but also without the South American strains (noSA subset), and only among the South American strains (SA10 and SA8 subsets). We observed a substantial increase in the core genome size for the 'noSA' subset, in comparison to 'All33' subset, and since the core genome reflects the closeness among the pangenome members, the results strongly suggest that the conservation level of the essential gene repertoire seems to be affected by the geographic origin of the strains being analyzed, supporting the existence of a distinct SA clade. The Raphidiopsis pangenome comprised a total of 7943 orthologous protein clusters, and the two new MAGs increased the pangenome size by 11%. The pangenome based phylogenetic relationships among the 33 analyzed genomes showed that the SA genomes clustered together with 99% bootstrap support, reinforcing the metabolic particularity of the Raphidiopsis South American clade, related to its saxitoxin producing unique ability, while also indicating a different evolutionary history due to its geographic isolation.}, } @article {pmid37944674, year = {2023}, author = {Mahnoor, I and Shabbir, H and Nawaz, S and Aziz, K and Aziz, U and Khalid, K and Irum, S and Andleeb, S}, title = {Characterization of exclusively non-commensal Neisseria gonorrhoeae pangenome to prioritize globally conserved and thermodynamically stable vaccine candidates using immune-molecular dynamic simulations.}, journal = {Microbial pathogenesis}, volume = {}, number = {}, pages = {106439}, doi = {10.1016/j.micpath.2023.106439}, pmid = {37944674}, issn = {1096-1208}, abstract = {Neisseria gonorrhoeae (Ngo) has emerged as a global threat leading to one of the most common sexually transmitted diseases in the world. It has also become one of the leading antimicrobial resistant organisms, resulting in fewer treatment options and an increased morbidity. Therefore, in recent years, there has been an increased focus on the development of new treatments and preventive strategies to combat its infection. In this study, we have combined the most conserved epitopes from the completely assembled strains of Ngo to develop a universal and a thermodynamically stable vaccine candidate. For our vaccine design, the epitopes were selected for their high immunogenicity, non-allergenicity and non-cytotoxicity, making them the ideal candidates for vaccine development. For the screening process, several reverse vaccinology tools were employed to rigorously extract non-homologous and immunogenic epitopes from the selected proteins. Consequently, a total number of 3 B-cell epitopes and 6 T-cell epitopes were selected and joined by multiple immune-modulating adjuvants and linkers to generate a promiscuous immune response. Additionally, the stability and flexible nature of the vaccine construct was confirmed using various molecular dynamic simulation tools. Overall, the vaccine candidate showed promising binding affinity to various HLA alleles and TLR receptors; however, further studies are needed to assess its efficacy in-vivo. In this way, we have designed a multi-subunit vaccine candidate to potentially combat and control the spread of N. gonorrhoeae.}, } @article {pmid37941143, year = {2023}, author = {Lu, K and Pan, Y and Shen, J and Yang, L and Zhan, C and Liang, S and Tai, S and Wan, L and Li, T and Cheng, T and Ma, B and Pan, G and He, N and Lu, C and Westhof, E and Xiang, Z and Han, MJ and Tong, X and Dai, F}, title = {SilkMeta: a comprehensive platform for sharing and exploiting pan-genomic and multi-omic silkworm data.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad956}, pmid = {37941143}, issn = {1362-4962}, support = {31 830 094//National Natural Science Foundation of China/ ; CARS-18-ZJ0102//China Agriculture Research System of MOF and MARA/ ; cstc2021jcyj-cxtt0005//Natural Science Foundation of Chongqing/ ; 2022CQBSHTB3066//Special Funding for Postdoctoral Research of Chongqing, China/ ; }, abstract = {The silkworm Bombyx mori is a domesticated insect that serves as an animal model for research and agriculture. The silkworm super-pan-genome dataset, which we published last year, is a unique resource for the study of global genomic diversity and phenotype-genotype association. Here we present SilkMeta (http://silkmeta.org.cn), a comprehensive database covering the available silkworm pan-genome and multi-omics data. The database contains 1082 short-read genomes, 546 long-read assembled genomes, 1168 transcriptomes, 294 phenotype characterizations (phenome), tens of millions of variations (variome), 7253 long non-coding RNAs (lncRNAs), 18 717 full length transcripts and a set of population statistics. We have compiled publications on functional genomics research and genetic stock deciphering (mutant map). A range of bioinformatics tools is also provided for data visualization and retrieval. The large batch of omics data and tools were integrated in twelve functional modules that provide useful strategies and data for comparative and functional genomics research. The interactive bioinformatics platform SilkMeta will benefit not only the silkworm but also the insect biology communities.}, } @article {pmid37940013, year = {2023}, author = {Krishnan, S and Sasi, S and Kodakkattumannil, P and Al Senaani, S and Lekshmi, G and Kottackal, M and Amiri, KMA}, title = {Cationic and anionic detergent buffers in sequence yield high-quality genomic DNA from diverse plant species.}, journal = {Analytical biochemistry}, volume = {}, number = {}, pages = {115372}, doi = {10.1016/j.ab.2023.115372}, pmid = {37940013}, issn = {1096-0309}, abstract = {Because of the heterogeneity among seedlings of outbreeding species, the use of seedling tissues as a source of DNA is unsuitable for the genomic characterization of elite germplasms. High-quality DNA, free of RNA, proteins, polysaccharides, secondary metabolites, and shearing, is mandatory for downstream molecular biology applications, especially for next-generation genome sequencing and pangenome analysis aiming to capture the complete genetic diversity within a species. The study aimed to accomplish an efficient protocol for the extraction of high-quality DNA suitable for diverse plant species/tissues. We describe a reliable, and consistent protocol suitable for the extraction of DNA from 42 difficult-to-extract plant species belonging to 33 angiosperm (monocot and dicot) families, including tissues such as seeds, roots, endosperm, and flower/fruit tissues. The protocol was first optimized for the outbreeding recalcitrant trees viz., Prosopis cineraria, Conocarpus erectus, and Phoenix dactylifera, which are rich in proteins, polysaccharides, and secondary metabolites, and the quality of the extracted DNA was confirmed by downstream applications. Nine procedures were attempted to extract high-quality, impurities-free DNA from these three plant species. Extraction of the ethanol-precipitated DNA from cetyltrimethylammonium bromide (CTAB) protocol using sodium dodecyl sulfate (SDS) buffer, i.e., the extraction using a cationic (CTAB) detergent followed by an anionic (SDS) detergent was the key for high yield and high purity (1.75-1.85 against A260/280 and an A260/230 ratio of >2) DNA. A vice versa extraction procedure, i.e., SDS buffer followed by CTAB buffer, and also CTAB buffer followed by CTAB, did not yield good-quality DNA. PCR (using different primers) and restriction endonuclease digestion of the DNA extracted from these three plants validated the protocol. The accomplishment of the genome of P. cineraria using the DNA extracted using the modified protocol confirmed its applicability to genomic studies. The optimized protocol successful in extracting high-quality DNA from diverse plant species/tissues extends its applicability and is useful for accomplishing genome sequences of elite germplasm of recalcitrant plant species with quality reads.}, } @article {pmid37938300, year = {2022}, author = {Gushgari-Doyle, S and Lui, LM and Nielsen, TN and Wu, X and Malana, RG and Hendrickson, AJ and Carion, H and Poole, FL and Adams, MWW and Arkin, AP and Chakraborty, R}, title = {Genotype to ecotype in niche environments: adaptation of Arthrobacter to carbon availability and environmental conditions.}, journal = {ISME communications}, volume = {2}, number = {1}, pages = {32}, pmid = {37938300}, issn = {2730-6151}, support = {DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; DE-AC02-05CH11231//DOE | SC | Biological and Environmental Research (BER)/ ; }, abstract = {Niche environmental conditions influence both the structure and function of microbial communities and the cellular function of individual strains. The terrestrial subsurface is a dynamic and diverse environment that exhibits specific biogeochemical conditions associated with depth, resulting in distinct environmental niches. Here, we present the characterization of seven distinct strains belonging to the genus Arthrobacter isolated from varying depths of a single sediment core and associated groundwater from an adjacent well. We characterized genotype and phenotype of each isolate to connect specific cellular functions and metabolisms to ecotype. Arthrobacter isolates from each ecotype demonstrated functional and genomic capacities specific to their biogeochemical conditions of origin, including laboratory-demonstrated characterization of salinity tolerance and optimal pH, and genes for utilization of carbohydrates and other carbon substrates. Analysis of the Arthrobacter pangenome revealed that it is notably open with a volatile accessory genome compared to previous pangenome studies on other genera, suggesting a high potential for adaptability to environmental niches.}, } @article {pmid37935586, year = {2023}, author = {Sen, S and Woodhouse, MR and Portwood, JL and Andorf, CM}, title = {Maize Feature Store: A centralized resource to manage and analyze curated maize multi-omics features for machine learning applications.}, journal = {Database : the journal of biological databases and curation}, volume = {2023}, number = {}, pages = {}, doi = {10.1093/database/baad078}, pmid = {37935586}, issn = {1758-0463}, support = {5030-21000-068-00-D//Department of Agriculture, Agricultural Research Service/ ; 5030-21000-068-00-D//Department of Agriculture, Agricultural Research Service/ ; }, abstract = {The big-data analysis of complex data associated with maize genomes accelerates genetic research and improves agronomic traits. As a result, efforts have increased to integrate diverse datasets and extract meaning from these measurements. Machine learning models are a powerful tool for gaining knowledge from large and complex datasets. However, these models must be trained on high-quality features to succeed. Currently, there are no solutions to host maize multi-omics datasets with end-to-end solutions for evaluating and linking features to target gene annotations. Our work presents the Maize Feature Store (MFS), a versatile application that combines features built on complex data to facilitate exploration, modeling and analysis. Feature stores allow researchers to rapidly deploy machine learning applications by managing and providing access to frequently used features. We populated the MFS for the maize reference genome with over 14 000 gene-based features based on published genomic, transcriptomic, epigenomic, variomic and proteomics datasets. Using the MFS, we created an accurate pan-genome classification model with an AUC-ROC score of 0.87. The MFS is publicly available through the maize genetics and genomics database. Database URL https://mfs.maizegdb.org/.}, } @article {pmid37935710, year = {2023}, author = {Radjasa, OK and Steven, R and Humaira, Z and Dwivany, FM and Nugrahapraja, H and Trinugroho, JP and Kristianti, T and Chahyadi, A and Natanael, Y and Priharto, N and Kamarisima, and Sembiring, FAPB and Dwijayanti, A and Kusmita, L and Moeis, MR and Suhardi, VSH}, title = {Biosynthetic gene cluster profiling from North Java Sea Virgibacillus salarius reveals hidden potential metabolites.}, journal = {Scientific reports}, volume = {13}, number = {1}, pages = {19273}, pmid = {37935710}, issn = {2045-2322}, support = {223/IT1.B07.1/TA.00/2022//Institut Teknologi Bandung Priority Research/ ; }, abstract = {Virgibacillus salarius 19.PP.SC1.6 is a coral symbiont isolated from Indonesia's North Java Sea; it has the ability to produce secondary metabolites that provide survival advantages and biological functions, such as ectoine, which is synthesized by an ectoine gene cluster. Apart from being an osmoprotectant for bacteria, ectoine is also known as a chemical chaperone with numerous biological activities such as maintaining protein stability, which makes ectoine in high demand in the market industry and makes it beneficial to investigate V. salarius ectoine. However, there has been no research on genome-based secondary metabolite and ectoine gene cluster characterization from Indonesian marine V. salarius. In this study, we performed a genomic analysis and ectoine identification of V. salarius. A high-quality draft genome with total size of 4.45 Mb and 4426 coding sequence (CDS) was characterized and then mapped into the Cluster of Orthologous Groups (COG) category. The genus Virgibacillus has an "open" pangenome type with total of 18 genomic islands inside the V. salarius 19.PP.SC1.6 genome. There were seven clusters of secondary metabolite-producing genes found, with a total of 80 genes classified as NRPS, PKS (type III), terpenes, and ectoine biosynthetic related genes. The ectoine gene cluster forms one operon consists of ectABC gene with 2190 bp gene cluster length, and is successfully characterized. The presence of ectoine in V. salarius was confirmed using UPLC-MS/MS operated in Multiple Reaction Monitoring (MRM) mode, which indicates that V. salarius has an intact ectoine gene clusters and is capable of producing ectoine as compatible solutes.}, } @article {pmid37934390, year = {2023}, author = {Ullah, A and Rehman, B and Khan, S and Almanaa, TN and Waheed, Y and Hassan, M and Naz, T and Ul Haq, M and Muhammad, R and Sanami, S and Irfan, M and Ahmad, S}, title = {An In Silico Multi-epitopes Vaccine Ensemble and Characterization Against Nosocomial Proteus penneri.}, journal = {Molecular biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37934390}, issn = {1559-0305}, support = {RSPD2023R632//Deanship of Scientific Research, King Saud University/ ; }, abstract = {Proteus penneri (P. penneri) is a bacillus-shaped, gram-negative, facultative anaerobe bacterium that is primarily an invasive pathogen and the etiological agent of several hospital-associated infections. P. penneri strains are naturally resistant to macrolides, amoxicillin, oxacillin, penicillin G, and cephalosporins; in addition, no vaccines are available against these strains. This warrants efforts to propose a theoretical based multi-epitope vaccine construct to prevent pathogen infections. In this research, reverse vaccinology bioinformatics and immunoinformatics approaches were adopted for vaccine target identification and construction of a multi-epitope vaccine. In the first phase, a core proteome dataset of the targeted pathogen was obtained using the NCBI database and subjected to bacterial pan-genome analysis using bacterial pan-genome analysis (BPGA) to predict core protein sequences which were then used to find good vaccine target candidates. This identified two proteins, Hcp family type VI secretion system effector and superoxide dismutase family protein, as promising vaccine targets. Afterward using the IEDB database, different B-cell and T-cell epitopes were predicted. A set of four epitopes "KGSVNVQDRE, NTGKLTGTR, IIHSDSWNER, and KDGKPVPALK" were chosen for the development of a multi-epitope vaccine construct. A 183 amino acid long vaccine design was built along with "EAAAK" and "GPGPG" linkers and a cholera toxin B-subunit adjuvant. The designed vaccine model comprised immunodominant, non-toxic, non-allergenic, and physicochemical stable epitopes. The model vaccine was docked with MHC-I, MHC-II, and TLR-4 immune cell receptors using the Cluspro2.0 web server. The binding energy score of the vaccine was - 654.7 kcal/mol for MHC-I, - 738.4 kcal/mol for MHC-II, and - 695.0 kcal/mol for TLR-4. A molecular dynamic simulation was done using AMBER v20 package for dynamic behavior in nanoseconds. Additionally, MM-PBSA binding free energy analysis was done to test intermolecular binding interactions between docked molecules. The MM-GBSA net binding energy score was - 148.00 kcal/mol, - 118.00 kcal/mol, and - 127.00 kcal/mol for vaccine with TLR-4, MHC-I, and MHC-II, respectively. Overall, these in silico-based predictions indicated that the vaccine is highly promising in terms of developing protective immunity against P. penneri. However, additional experimental validation is required to unveil the real immune response to the designed vaccine.}, } @article {pmid37934072, year = {2023}, author = {Raghuram, V and Gunoskey, JJ and Hofstetter, KS and Jacko, NF and Shumaker, MJ and Hu, YJ and Read, TD and David, MZ}, title = {Comparison of genomic diversity between single and pooled Staphylococcus aureus colonies isolated from human colonization cultures.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001111}, pmid = {37934072}, issn = {2057-5858}, abstract = {The most common approach to sampling the bacterial populations within an infected or colonized host is to sequence genomes from a single colony obtained from a culture plate. However, it is recognized that this method does not capture the genetic diversity in the population. Sequencing a mixture of several colonies (pool-seq) is a better approach to detect population heterogeneity, but it is more complex to analyse due to different types of heterogeneity, such as within-clone polymorphisms, multi-strain mixtures, multi-species mixtures and contamination. Here, we compared 8 single-colony isolates (singles) and pool-seq on a set of 2286 Staphylococcus aureus culture samples to identify features that can distinguish pure samples, samples undergoing intraclonal variation and mixed strain samples. The samples were obtained by swabbing 3 body sites on 85 human participants quarterly for a year, who initially presented with a methicillin-resistant S. aureus skin and soft-tissue infection (SSTI). We compared parameters such as sequence quality, contamination, allele frequency, nucleotide diversity and pangenome diversity in each pool to those for the corresponding singles. Comparing singles from the same culture plate, we found that 18% of sample collections contained mixtures of multiple multilocus sequence types (MLSTs or STs). We showed that pool-seq data alone could predict the presence of multi-ST populations with 95% accuracy. We also showed that pool-seq could be used to estimate the number of intra-clonal polymorphic sites in the population. Additionally, we found that the pool may contain clinically relevant genes such as antimicrobial resistance markers that may be missed when only examining singles. These results highlight the potential advantage of analysing genome sequences of total populations obtained from clinical cultures rather than single colonies.}, } @article {pmid37934071, year = {2023}, author = {Sommer, H and Djamalova, D and Galardini, M}, title = {Reduced ambiguity and improved interpretability of bacterial genome-wide associations using gene-cluster-centric k-mers.}, journal = {Microbial genomics}, volume = {9}, number = {11}, pages = {}, doi = {10.1099/mgen.0.001129}, pmid = {37934071}, issn = {2057-5858}, abstract = {The wide adoption of bacterial genome sequencing and encoding both core and accessory genome variation using k-mers has allowed bacterial genome-wide association studies (GWAS) to identify genetic variants associated with relevant phenotypes such as those linked to infection. Significant limitations still remain because of k-mers being duplicated across gene clusters and as far as the interpretation of association results is concerned, which affects the wider adoption of GWAS methods on microbial data sets. We have developed a simple computational method (panfeed) that explicitly links each k-mer to their gene cluster at base-resolution level, which allows us to avoid biases introduced by a global de Bruijn graph as well as more easily map and annotate associated variants. We tested panfeed on two independent data sets, correctly identifying previously characterized causal variants, which demonstrates the precision of the method, as well as its scalable performance. panfeed is a command line tool written in the python programming language and is available at https://github.com/microbial-pangenomes-lab/panfeed.}, } @article {pmid37934016, year = {2023}, author = {Garcia, J and Morales-Cruz, A and Cochetel, N and Minio, A and Figueroa-Balderas, R and Rolshausen, P and Baumgartner, K and Cantu, D}, title = {Comparative pangenomic insights into the distinct evolution of virulence factors among grapevine trunk pathogens.}, journal = {Molecular plant-microbe interactions : MPMI}, volume = {}, number = {}, pages = {}, doi = {10.1094/MPMI-09-23-0129-R}, pmid = {37934016}, issn = {0894-0282}, abstract = {The permanent organs of grapevines (V. vinifera L.), like other woody perennials, are colonized by various unrelated pathogenic ascomycete fungi secreting cell wall-degrading enzymes and phytotoxic secondary metabolites that contribute to host damage and disease symptoms. Trunk pathogens differ in the symptoms they induce and the extent and speed of damage. Isolates of the same species often display a wide virulence range, even within the same vineyard. This study focuses on Eutypa lata, Neofusicoccum parvum, and Phaeoacremonium minimum, causal agents of Eutypa dieback, Botryosphaeria dieback, and Esca, respectively. We sequenced fifty isolates from viticulture regions worldwide and built nucleotide-level, reference-free pangenomes for each species. Through examining genomic diversity and pangenome structure, we analyzed intraspecific conservation and variability of putative virulence factors, focusing on functions under positive selection, and recent gene-family dynamics of contraction and expansion. Our findings reveal contrasting distributions of putative virulence factors in the core, dispensable, and private genomes of each pangenome. For example, CAZymes were prevalent in the core genomes of each pangenome, whereas biosynthetic gene clusters were prevalent in the dispensable genomes of E. lata and P. minimum. The dispensable fractions were also enriched in Gypsy transposable elements and virulence factors under positive selection (polyketide synthases genes in E. lata and P. minimum glycosyltransferases in N. parvum). Our findings underscore the complexity of the genomic architecture in each species and provide insights into their adaptive strategies, enhancing our understanding of the underlying mechanisms of virulence.}, } @article {pmid37931775, year = {2023}, author = {Laufer, V and Glover, TW and Wilson, TE}, title = {Applications of advanced technologies for detecting genomic structural variation.}, journal = {Mutation research. Reviews in mutation research}, volume = {}, number = {}, pages = {108475}, doi = {10.1016/j.mrrev.2023.108475}, pmid = {37931775}, issn = {1388-2139}, abstract = {Chromosomal structural variation (SV) encompasses a heterogenous class of genetic variants that exerts strong influences on human health and disease. Despite their importance, many structural variants (SVs) have remained poorly characterized at even a basic level, a discrepancy predicated upon the technical limitations of prior genomic assays. However, recent advances in genomic technology can identify and localize SVs accurately, opening new questions regarding SV risk factors and their impacts in humans. Here, we first define and classify human SVs and their generative mechanisms, highlighting characteristics leveraged by various SV assays. We next examine the first-ever gapless assembly of the human genome and the technical process of assembling it, which required third-generation sequencing technologies to resolve structurally complex loci. The new portions of that "telomere-to-telomere" and subsequent pangenome assemblies highlight aspects of SV biology likely to develop in the near-term. We consider the strengths and limitations of the most promising new SV technologies and when they or longstanding approaches are best suited to meeting salient goals in the study of human SV in population-scale genomics research, clinical, and public health contexts. It is a watershed time in our understanding of human SV when new approaches are expected to fundamentally change genomic applications.}, } @article {pmid37928322, year = {2023}, author = {Magome, TG and Ramatla, T and Mokgokong, P and Thekisoe, O and Lekota, KE}, title = {The draft genome and pan-genome structure of Paraclostridium bifermentans strain T2 isolated from sheep faeces.}, journal = {Data in brief}, volume = {51}, number = {}, pages = {109660}, pmid = {37928322}, issn = {2352-3409}, abstract = {Paraclostridium bifermentans is a Gram-positive, rod-shaped bacterium that can inhabit various mesophilic environments such as soil, marine habitats, and polluted waters. Some species of Paraclostridium are reported to cause fatal infections in humans, although mechanisms and capacity for adaptation are still unknown. We hereby present the whole genome sequence data of P. bifermentans T2 strain isolated from sheep faecal matter in Potchefstroom, South Africa. DNA libraries were sequenced on the Oxford Nanopore Mk1B platform. The generated sequence data was assembled and polished using Flye assembler. Genome data analysis yielded a genome size of 2 911,782 bp, comprising of a 27.8 % G + C content. Rapid Annotation using Subsystem Technology (RAST) showed that the draft genome of this strain consists of 6 514 coding sequences (CDS). The pan-genome was defined by a total of 16 288 CDSs, grouping the strain with the genome of P. bifermentans SampleS7P1. The draft genome sequence has been deposited in NCBI GenBank with the accession number of JAUPET000000000.}, } @article {pmid37920964, year = {2023}, author = {Bachari, A and Nassar, N and Schanknecht, E and Telukutla, S and Piva, TJ and Mantri, N}, title = {Rationalizing a prospective coupling effect of cannabinoids with the current pharmacotherapy for melanoma treatment.}, journal = {WIREs mechanisms of disease}, volume = {}, number = {}, pages = {e1633}, doi = {10.1002/wsbm.1633}, pmid = {37920964}, issn = {2692-9368}, abstract = {Melanoma is one of the leading fatal forms of cancer, yet from a treatment perspective, we have minimal control over its reoccurrence and resistance to current pharmacotherapies. The endocannabinoid system (ECS) has recently been accepted as a multifaceted homeostatic regulator, influencing various physiological processes across different biological compartments, including the skin. This review presents an overview of the pathophysiology of melanoma, current pharmacotherapy used for treatment, and the challenges associated with the different pharmacological approaches. Furthermore, it highlights the utility of cannabinoids as an additive remedy for melanoma by restoring the balance between downregulated immunomodulatory pathways and elevated inflammatory cytokines during chronic skin conditions as one of the suggested critical approaches in treating this immunogenic tumor. This article is categorized under: Cancer > Molecular and Cellular Physiology.}, } @article {pmid37546988, year = {2023}, author = {Pibiri, GE and Fan, J and Patro, R}, title = {Meta-colored compacted de Bruijn graphs.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.07.21.550101}, pmid = {37546988}, abstract = {MOTIVATION: The colored compacted de Bruijn graph (c-dBG) has become a fundamental tool used across several areas of genomics and pangenomics. For example, it has been widely adopted by methods that perform read mapping or alignment, abundance estimation, and subsequent downstream analyses. These applications essentially regard the c-dBG as a map from k-mers to the set of references in which they appear. The c-dBG data structure should retrieve this set -- the color of the k-mer -- efficiently for any given k-mer, while using little memory. To aid retrieval, the colors are stored explicitly in the data structure and take considerable space for large reference collections, even when compressed. Reducing the space of the colors is therefore of utmost importance for large-scale sequence indexing.

RESULTS: We describe the meta-colored compacted de Bruijn graph (Mac-dBG) -- a new colored de Bruijn graph data structure where colors are represented holistically, i.e., taking into account their redundancy across the whole collection being indexed, rather than individually as atomic integer lists. This allows the factorization and compression of common sub-patterns across colors. While optimizing the space of our data structure is NP-hard, we propose a simple heuristic algorithm that yields practically good solutions. Results show that the Mac-dBG data structure improves substantially over the best previous space/time trade-off, by providing remarkably better compression effectiveness for the same (or better) query efficiency. This improved space/time trade-off is robust across different datasets and query workloads. Code availability: A C++17 implementation of the Mac-dBG is publicly available on GitHub at: https://github.com/jermp/fulgor.}, } @article {pmid37918082, year = {2023}, author = {Zhuang, Z and Cheng, YY and Deng, J and Cai, Z and Zhong, L and Qu, JX and Wang, K and Yang, L}, title = {Genomic insights into the phage-defense systems of Stenotrophomonas maltophilia clinical isolates.}, journal = {Microbiological research}, volume = {278}, number = {}, pages = {127528}, doi = {10.1016/j.micres.2023.127528}, pmid = {37918082}, issn = {1618-0623}, abstract = {Stenotrophomonas maltophilia is a rapidly evolving multidrug-resistant opportunistic pathogen that can cause serious infections in immunocompromised patients. Although phage therapy is one of promising strategies for dealing with MDR bacteria, the main challenges of phage therapeutics include accumulation of phage resistant mutations and acquisition of the phage defense systems. To systematically evaluate the impact of (pro)phages in shaping genetic and evolutionary diversity of S. maltophilia, we collected 166 S. maltophilia isolates from three hospitals in southern China to analyze its pangenome, virulence factors, prophage regions, and anit-viral immune systems. Pangenome analysis indicated that there are 1328 saturated core genes and 26961 unsaturated accessory genes in the pangenome, suggesting existence of highly variable parts of S. maltophilia genome. The presence of genes in relation to T3SS and T6SS mechanisms suggests the great potential to secrete toxins by the S. maltophilia population, which is contrary to the conventional notion of low-virulence of S. maltophilia. Additionally, we characterized the pan-immune system maps of these clinical isolates against phage infections and revealed the co-harboring of CBASS and anti-CBASS in some strains, suggesting a never-ending arms race and the co-evolutionary dynamic between bacteria and phages. Furthermore, our study predicted 310 prophage regions in S. maltophilia with high genetic diversity. Six viral defense systems were found to be located at specific position of the S. maltophilia prophage genomes, indicating potential evolution of certain site/region similar to bacterial 'defense islands' in prophage. Our study provides novel insights of the S. maltophilia pangenome in relation to phage-defense mechanisms, which extends to our understanding of bacterial-phage interactions and might the guide application of phage therapy in combating S. maltophilia infections.}, } @article {pmid37919000, year = {2024}, author = {Mun, SY and Lee, W and Lee, SY and Chang, JY and Chang, HC}, title = {Pediococcus inopinatus with a well-developed CRISPR-Cas system dominates in long-term fermented kimchi, Mukeunji.}, journal = {Food microbiology}, volume = {117}, number = {}, pages = {104385}, doi = {10.1016/j.fm.2023.104385}, pmid = {37919000}, issn = {1095-9998}, abstract = {Kimchi is produced through a low-temperature fermentation without pre-sterilization, resulting in a heterogeneous microbial community. As fermentation progresses, dominant lactic acid bacteria (LAB) species emerge and undergo a transition process. In this study, LAB were isolated from Mukeunji, a long-term fermented kimchi that is in the final stage of kimchi fermentation process. It was confirmed, through culture-dependent and independent analysis, as well as metagenome analysis, that Pediococcus inopinatus are generally dominant in long-term fermented kimchi. Comparative analysis of the de novo assembled whole genome of P. inopinatus with other kimchi LAB revealed that this species has a well-developed clustered regularly interspaced short palindromic repeats (CRISPR) system. The CRISPR system of P. inopinatus has an additional copy of the csa3 gene, a transcription factor for cas genes. Indeed, this species not only highly expresses cas1 and cas2, which induce spacer acquisition, but also has many diverse spacers that are actively expressed. These findings indicate that the well-developed CRISPR-Cas system is enabling P. inopinatus to dominate in long-fermented kimchi. Overall, this study revealed that LAB with a robust defense system dominate in the final stage of kimchi fermentation and presented a model for the succession mechanism of kimchi LAB.}, } @article {pmid37917733, year = {2023}, author = {Chinchilla, D and Nieves, C and Gutiérrez, R and Sordoillet, V and Veyrier, FJ and Picardeau, M}, title = {Phylogenomics of Leptospira santarosai, a prevalent pathogenic species in the Americas.}, journal = {PLoS neglected tropical diseases}, volume = {17}, number = {11}, pages = {e0011733}, doi = {10.1371/journal.pntd.0011733}, pmid = {37917733}, issn = {1935-2735}, abstract = {BACKGROUND: Leptospirosis is a complex zoonotic disease mostly caused by a group of eight pathogenic species (L. interrogans, L. borgpetersenii, L. kirschneri, L. mayottensis, L. noguchii, L. santarosai, L. weilii, L. alexanderi), with a wide spectrum of animal reservoirs and patient outcomes. Leptospira interrogans is considered as the leading causative agent of leptospirosis worldwide and it is the most studied species. However, the genomic features and phylogeography of other Leptospira pathogenic species remain to be determined.

Here we investigated the genome diversity of the main pathogenic Leptospira species based on a collection of 914 genomes from strains isolated around the world. Genome analyses revealed species-specific genome size and GC content, and an open pangenome in the pathogenic species, except for L. mayottensis. Taking advantage of a new set of genomes of L. santarosai strains isolated from patients in Costa Rica, we took a closer look at this species. L. santarosai strains are largely distributed in America, including the Caribbean islands, with over 96% of the available genomes originating from this continent. Phylogenetic analysis showed high genetic diversity within L. santarosai, and the clonal groups identified by cgMLST were strongly associated with geographical areas. Serotype identification based on serogrouping and/or analysis of the O-antigen biosynthesis gene loci further confirmed the great diversity of strains within the species.

CONCLUSIONS/SIGNIFICANCE: In conclusion, we report a comprehensive genome analysis of pathogenic Leptospira species with a focus on L. santarosai. Our study sheds new light onto the genomic diversity, evolutionary history, and epidemiology of leptospirosis in America and globally. Our findings also expand our knowledge of the genes driving O-antigen diversity. In addition, our work provides a framework for understanding the virulence and spread of L. santarosai and for improving its surveillance in both humans and animals.}, } @article {pmid37914227, year = {2023}, author = {Li, Z and Liu, X and Wang, C and Li, Z and Jiang, B and Zhang, R and Tong, L and Qu, Y and He, S and Chen, H and Mao, Y and Li, Q and Pook, T and Wu, Y and Zan, Y and Zhang, H and Li, L and Wen, K and Chen, Y}, title = {The pig pangenome provides insights into the roles of coding structural variations in genetic diversity and adaptation.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277638.122}, pmid = {37914227}, issn = {1549-5469}, abstract = {Structural variations have emerged as an important driving force for genome evolution and phenotypic variation in various organisms, yet their contributions to genetic diversity and adaptation in domesticated animals remain largely unknown. Here we constructed a pangenome based on 250 sequenced individuals from 32 pig breeds in Eurasia and systematically characterized coding sequence presence/absence variations (PAVs) within pigs. We identified 308.3-Mb nonreference sequences and 3438 novel genes absent from the current reference genome. Gene PAV analysis showed that 16.8% of the genes in the pangene catalog undergo PAV. A number of newly identified dispensable genes showed close associations with adaptation. For instance, several novel swine leukocyte antigen (SLA) genes discovered in nonreference sequences potentially participate in immune responses to productive and respiratory syndrome virus (PRRSV) infection. We delineated previously unidentified features of the pig mobilome that contained 490,480 transposable element insertion polymorphisms (TIPs) resulting from recent mobilization of 970 TE families, and investigated their population dynamics along with influences on population differentiation and gene expression. In addition, several candidate adaptive TE insertions were detected to be co-opted into genes responsible for responses to hypoxia, skeletal development, regulation of heart contraction, and neuronal cell development, likely contributing to local adaptation of Tibetan wild boars. These findings enhance our understanding on hidden layers of the genetic diversity in pigs and provide novel insights into the role of SVs in the evolutionary adaptation of mammals.}, } @article {pmid37910550, year = {2023}, author = {Angelo, L and Vaillant, A and Blanchet, M and Labonté, P}, title = {Pangenomic antiviral effect of REP 2139 in CRISPR/Cas9 engineered cell lines expressing hepatitis B virus surface antigen.}, journal = {PloS one}, volume = {18}, number = {11}, pages = {e0293167}, doi = {10.1371/journal.pone.0293167}, pmid = {37910550}, issn = {1932-6203}, abstract = {Chronic hepatitis B remains a global health problem with 296 million people living with chronic HBV infection and being at risk of developing cirrhosis and hepatocellular carcinoma. Non-infectious subviral particles (SVP) are produced in large excess over infectious Dane particles in patients and are the major source of Hepatitis B surface antigen (HBsAg). They are thought to exhaust the immune system, and it is generally considered that functional cure requires the clearance of HBsAg from blood of patient. Nucleic acid polymers (NAPs) antiviral activity lead to the inhibition of HBsAg release, resulting in rapid clearance of HBsAg from circulation in vivo. However, their efficacy has only been demonstrated in limited genotypes in small scale clinical trials. HBV exists as nine main genotypes (A to I). In this study, the HBsAg ORFs from the most prevalent genotypes (A, B, C, D, E, G), which account for over 96% of human cases, were inserted into the AAVS1 safe-harbor of HepG2 cells using CRISPR/Cas9 knock-in. A cell line producing the D144A vaccine escape mutant was also engineered. The secretion of HBsAg was confirmed into these new genotype cell lines (GCLs) and the antiviral activity of the NAP REP 2139 was then assessed. The results demonstrate that REP 2139 exerts an antiviral effect in all genotypes and serotypes tested in this study, including the vaccine escape mutant, suggesting a pangenomic effect of the NAPs.}, } @article {pmid37910167, year = {2023}, author = {English, J and Newberry, F and Hoyles, L and Patrick, S and Stewart, L}, title = {Genomic analyses of Bacteroides fragilis: subdivisions I and II represent distinct species.}, journal = {Journal of medical microbiology}, volume = {72}, number = {11}, pages = {}, doi = {10.1099/jmm.0.001768}, pmid = {37910167}, issn = {1473-5644}, abstract = {Introduction. Bacteroides fragilis is a Gram-negative anaerobe that is a member of the human gastrointestinal microbiota and is frequently found as an extra-intestinal opportunistic pathogen. B. fragilis comprises two distinct groups - divisions I and II - characterized by the presence/absence of genes [cepA and ccrA (cfiA), respectively] that confer resistance to β-lactam antibiotics by either serine or metallo-β-lactamase production. No large-scale analyses of publicly available B. fragilis sequence data have been undertaken, and the resistome of the species remains poorly defined.Hypothesis/Gap Statement. Reclassification of divisions I and II B. fragilis as two distinct species has been proposed but additional evidence is required.Aims. To investigate the genomic diversity of GenBank B. fragilis genomes and establish the prevalence of division I and II strains among publicly available B. fragilis genomes, and to generate further evidence to demonstrate that B. fragilis division I and II strains represent distinct genomospecies.Methodology. High-quality (n=377) genomes listed as B. fragilis in GenBank were included in pangenome and functional analyses. Genome data were also subject to resistome profiling using The Comprehensive Antibiotic Resistance Database.Results. Average nucleotide identity and phylogenetic analyses showed B. fragilis divisions I and II represent distinct species: B. fragilis sensu stricto (n=275 genomes) and B. fragilis A (n=102 genomes; Genome Taxonomy Database designation), respectively. Exploration of the pangenome of B. fragilis sensu stricto and B. fragilis A revealed separation of the two species at the core and accessory gene levels.Conclusion. The findings indicate that B. fragilis A, previously referred to as division II B. fragilis, is an individual species and distinct from B. fragilis sensu stricto. The B. fragilis pangenome analysis supported previous genomic, phylogenetic and resistome screening analyses collectively reinforcing that divisions I and II are two separate species. In addition, it was confirmed that differences in the accessory genes of B. fragilis divisions I and II are primarily associated with carbohydrate metabolism and suggest that differences other than antimicrobial resistance could also be used to distinguish between these two species.}, } @article {pmid37907856, year = {2023}, author = {Hodgeman, R and Mann, R and Djitro, N and Savin, K and Rochfort, S and Rodoni, B}, title = {The pan-genome of Mycobacterium avium subsp. paratuberculosis (Map) confirms ancestral lineage and reveals gene rearrangements within Map Type S.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {656}, pmid = {37907856}, issn = {1471-2164}, abstract = {BACKGROUND: To date genomic studies on Map have concentrated on Type C strains with only a few Type S strains included for comparison. In this study the entire pan-genome of 261 Map genomes (205 Type C, 52 Type S and 4 Type B) and 7 Mycobacterium avium complex (Mac) genomes were analysed to identify genomic similarities and differences between the strains and provide more insight into the evolutionary relationship within this Mycobacterial species.

RESULTS: Our analysis of the core genome of all the Map isolates identified two distinct lineages, Type S and Type C Map that is consistent with previous phylogenetic studies of Map. Pan-genome analysis revealed that Map has a larger accessory genome than Mycobacterium avium subsp. avium (Maa) and Type C Map has a larger accessory genome than Type S Map. In addition, we found large rearrangements within Type S strains of Map and little to none in Type C and Type B strains. There were 50 core genes identified that were unique to Type S Map and there were no unique core genes identified between Type B and Type C Map strains. In Type C Map we identified an additional CE10 CAZyme class which was identified as an alpha/beta hydrolase and an additional polyketide and non-ribosomal peptide synthetase cluster. Consistent with previous analysis no plasmids and only incomplete prophages were identified in the genomes of Map. There were 45 hypothetical CRISPR elements identified with no associated cas genes.

CONCLUSION: This is the most comprehensive comparison of the genomic content of Map isolates to date and included the closing of eight Map genomes. The analysis revealed that there is greater variation in gene synteny within Type S strains when compared to Type C indicating that the Type C Map strain emerged after Type S. Further analysis of Type C and Type B genomes revealed that they are structurally similar with little to no genetic variation and that Type B Map may be a distinct clade within Type C Map and not a different strain type of Map. The evolutionary lineage of Maa and Map was confirmed as emerging after M. hominissuis.}, } @article {pmid37904249, year = {2023}, author = {Manzano-Morales, S and Liu, Y and González-Bodí, S and Huerta-Cepas, J and Iranzo, J}, title = {Comparison of gene clustering criteria reveals intrinsic uncertainty in pangenome analyses.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {250}, pmid = {37904249}, issn = {1474-760X}, support = {PID2019-106618GA-I00//Agencia Estatal de Investigación/ ; RYC-2017-22524//Agencia Estatal de Investigación/ ; PGC2018-098073-A-I00//Agencia Estatal de Investigación/ ; SEV-2016-0672//Agencia Estatal de Investigación/ ; PID2021-127210NB-I00//Agencia Estatal de Investigación/ ; PTA2021-020636-I//Agencia Estatal de Investigación/ ; M190020074JIIS//Comunidad de Madrid/ ; 202008440425//China Scholarship Council/ ; }, abstract = {BACKGROUND: A key step for comparative genomics is to group open reading frames into functionally and evolutionarily meaningful gene clusters. Gene clustering is complicated by intraspecific duplications and horizontal gene transfers that are frequent in prokaryotes. In consequence, gene clustering methods must deal with a trade-off between identifying vertically transmitted representatives of multicopy gene families, which are recognizable by synteny conservation, and retrieving complete sets of species-level orthologs. We studied the implications of adopting homology, orthology, or synteny conservation as formal criteria for gene clustering by performing comparative analyses of 125 prokaryotic pangenomes.

RESULTS: Clustering criteria affect pangenome functional characterization, core genome inference, and reconstruction of ancestral gene content to different extents. Species-wise estimates of pangenome and core genome sizes change by the same factor when using different clustering criteria, allowing robust cross-species comparisons regardless of the clustering criterion. However, cross-species comparisons of genome plasticity and functional profiles are substantially affected by inconsistencies among clustering criteria. Such inconsistencies are driven not only by mobile genetic elements, but also by genes involved in defense, secondary metabolism, and other accessory functions. In some pangenome features, the variability attributed to methodological inconsistencies can even exceed the effect sizes of ecological and phylogenetic variables.

CONCLUSIONS: Choosing an appropriate criterion for gene clustering is critical to conduct unbiased pangenome analyses. We provide practical guidelines to choose the right method depending on the research goals and the quality of genome assemblies, and a benchmarking dataset to assess the robustness and reproducibility of future comparative studies.}, } @article {pmid37902967, year = {2023}, author = {Chandra, G and Jain, C}, title = {Gap-Sensitive Colinear Chaining Algorithms for Acyclic Pangenome Graphs.}, journal = {Journal of computational biology : a journal of computational molecular cell biology}, volume = {}, number = {}, pages = {}, doi = {10.1089/cmb.2023.0186}, pmid = {37902967}, issn = {1557-8666}, abstract = {A pangenome graph can serve as a better reference for genomic studies because it allows a compact representation of multiple genomes within a species. Aligning sequences to a graph is critical for pangenome-based resequencing. The seed-chain-extend heuristic works by finding short exact matches between a sequence and a graph. In this heuristic, colinear chaining helps identify a good cluster of exact matches that can be combined to form an alignment. Colinear chaining algorithms have been extensively studied for aligning two sequences with various gap costs, including linear, concave, and convex cost functions. However, extending these algorithms for sequence-to-graph alignment presents significant challenges. Recently, Makinen et al. introduced a sparse dynamic programming framework that exploits the small path cover property of acyclic pangenome graphs, enabling efficient chaining. However, this framework does not consider gap costs, limiting its practical effectiveness. We address this limitation by developing novel problem formulations and provably good chaining algorithms that support a variety of gap cost functions. These functions are carefully designed to enable fast chaining algorithms whose time requirements are parameterized in terms of the size of the minimum path cover. Through an empirical evaluation, we demonstrate the superior performance of our algorithm compared with existing aligners. When mapping simulated long reads to a pangenome graph comprising 95 human haplotypes, we achieved 98.7% precision while leaving <2% of reads unmapped.}, } @article {pmid37897717, year = {2023}, author = {Alsubaiyel, AM and Bukhari, SI}, title = {Computational exploration and design of a multi-epitopes vaccine construct against Chlamydia psittaci.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-17}, doi = {10.1080/07391102.2023.2268173}, pmid = {37897717}, issn = {1538-0254}, abstract = {Chlamydia psittaci is an intracellular pathogen and causes variety of deadly infections in humans. Antibiotics are effective against C. psittaci however high percentage of resistant strains have been reported in recent times. As there is no licensed vaccine, we used in-silico techniques to design a multi-epitopes vaccine against C. psittaci. Following a step-wise protocol, the proteome of available 26 strains was retrieved and filtered for subcellular localized proteins. Five proteins were selected (2 extracellular and 3 outer membrane) and were further analyzed for B-cell and T-cell epitopes prediction. Epitopes were further checked for antigenicity, solubility, stability, toxigenicity, allergenicity, and adhesive properties. Filtered epitopes were linked via linkers and the 3D structure of the designed vaccine construct was predicted. Binding of the designed vaccine with immune receptors: MHC-I, MHC-II, and TLR-4 was analyzed, which resulted in docking energy scores of -4.37 kcal/mol, -0.20 kcal/mol and -22.38 kcal/mol, respectively. Further, the docked complexes showed stable dynamics with a maximum value of vaccine-MHC-I complex (7.8 Å), vaccine-MHC-II complex (6.2 Å) and vaccine-TLR4 complex (5.2 Å). As per the results, the designed vaccine construct reported robust immune responses to protect the host against C. psittaci infections. In the study, the C. psittaci proteomes were considered in pan-genome analysis to extract core proteins. The pan-genome analysis was conducted using bacterial pan-genome analysis (BPGA) software. The core proteins were checked further for non-redundant proteins using a CD-Hit server. Surface localized proteins were investigated using PSORTb v 3.0. The surface proteins were BLASTp against Virulence Factor Data Base (VFDB) to predict virulent factors. Antigenicity prediction of the shortlisted proteins was further done using VAXIGEN v 2.0. The epitope mapping was done using the immune epitope database (IEDB). A multi-epitopes vaccine was built and a 3D structure was generated using 3Dprot online server. The docking analysis of the designed vaccine with immune receptors was carried out using PATCHDOCK. Molecular dynamics and post-simulation analyses were carried out using AMBER v20 to decipher the dynamics stability and intermolecular binding energies of the docked complexes.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37897710, year = {2023}, author = {Hamed, SM and Mohamed, HO and Ashour, HM and Fahmy, LI}, title = {Comparative genomic analysis of strong biofilm-forming Klebsiella pneumoniae isolates uncovers novel ISEcp1-mediated chromosomal integration of a full plasmid-like sequence.}, journal = {Infectious diseases (London, England)}, volume = {}, number = {}, pages = {1-19}, doi = {10.1080/23744235.2023.2272624}, pmid = {37897710}, issn = {2374-4243}, abstract = {BACKGROUND: The goal of the current study was to elucidate the genomic background of biofilm formation in Klebsiella pneumoniae.

METHODS: Clinical isolates were screened for biofilm formation using the crystal violet assay. Antimicrobial resistance (AMR) profiles were assessed by disk diffusion and broth microdilution tests. Biofilm formation was correlated to virulence and resistance genes screened by PCR. Draft genomes of three isolates that form strong biofilm were generated by Illumina sequencing.

RESULTS: Only the siderophore-coding gene iutA was significantly associated with more pronounced biofilm formation. ST1399-KL43-O1/O2v1 and ST11-KL15-O4 were assigned to the multidrug-resistant strain K21 and the extensively drug-resistant strain K237, respectively. ST1999-KL38-O12 was assigned to K57. Correlated with CRISPR/Cas distribution, more plasmid replicons and prophage sequences were identified in K21 and K237 compared to K57. The acquired AMR genes (blaOXA-48, rmtF, aac(6')-Ib and qnrB) and (blaNDM-1, blaCTX-M, aph(3')-VI, qnrS, and aac(6')-Ib-cr) were found in K237 and K21, respectively. The latter showed a novel ISEcp1-mediated chromosomal integration of replicon type IncM1 plasmid-like structure harboring blaCTX-M-14 and aph(3')-VI that uniquely interrupted rcsC. The plasmid-mediated heavy metal resistance genes merACDEPRT and arsABCDR were spotted in K21, which also exclusively carried the acquired virulence genes mrkABCDF and the hypervirulence-associated genes iucABCD-iutA, and rmpA/A2. Pangenome analysis revealed NTUH-K2044 accessory genes most frequently shared with K21.

CONCLUSIONS: While less virulent to Galleria mellonella than ST1999 (K57), the strong biofilm former, multidrug-resistant, NDM-producer K. pneumoniae K21 (ST1399-KL43-O1/O2v1) carries a novel chromosomally integrated plasmid-like structure and hypervirulence-associated genes and represents a serious threat to countries in the area.}, } @article {pmid37897361, year = {2023}, author = {Hu, R and Li, F and Chen, Y and Liu, C and Li, J and Ma, Z and Wang, Y and Cui, C and Luo, C and Zhou, P and Ni, W and Yang, QY and Hu, S}, title = {AnimalMetaOmics: a multi-omics data resources for exploring animal microbial genomes and microbiomes.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad931}, pmid = {37897361}, issn = {1362-4962}, support = {2021ZD01//Foundation of state key laboratory of sheep genetic improvement and healthy production/ ; //Tianshan Talent Project/ ; 2022xjkk1202//The Tird Xinjiang Scientifc Expedition Program/ ; }, abstract = {The Animal Meta-omics landscape database (AnimalMetaOmics, https://yanglab.hzau.edu.cn/animalmetaomics#/) is a comprehensive and freely available resource that includes metagenomic, metatranscriptomic, and metaproteomic data from various non-human animal species and provides abundant information on animal microbiomes, including cluster analysis of microbial cognate genes, functional gene annotations, active microbiota composition, gene expression abundance, and microbial protein identification. In this work, 55 898 microbial genomes were annotated from 581 animal species, including 42 924 bacterial genomes, 12 336 virus genomes, 496 archaea genomes and 142 fungi genomes. Moreover, 321 metatranscriptomic datasets were analyzed from 31 animal species and 326 metaproteomic datasets from four animal species, as well as the pan-genomic dynamics and compositional characteristics of 679 bacterial species and 13 archaea species from animal hosts. Researchers can efficiently access and acquire the information of cross-host microbiota through a user-friendly interface, such as species, genomes, activity levels, expressed protein sequences and functions, and pan-genome composition. These valuable resources provide an important reference for better exploring the classification, functional diversity, biological process diversity and functional genes of animal microbiota.}, } @article {pmid37897345, year = {2023}, author = {Dimonaco, NJ and Clare, A and Kenobi, K and Aubrey, W and Creevey, CJ}, title = {StORF-Reporter: finding genes between genes.}, journal = {Nucleic acids research}, volume = {}, number = {}, pages = {}, doi = {10.1093/nar/gkad814}, pmid = {37897345}, issn = {1362-4962}, support = {//Aberystwyth University/ ; //McMaster University/ ; //Weston Family Microbiome Initiative/ ; BB/E/W/10964A01//BBSRC/ ; R3192GFS//DAFM Ireland/DAERA Northern Ireland/ ; 818368//Horizon 2020/ ; }, abstract = {Large regions of prokaryotic genomes are currently without any annotation, in part due to well-established limitations of annotation tools. For example, it is routine for genes using alternative start codons to be misreported or completely omitted. Therefore, we present StORF-Reporter, a tool that takes an annotated genome and returns regions that may contain missing CDS genes from unannotated regions. StORF-Reporter consists of two parts. The first begins with the extraction of unannotated regions from an annotated genome. Next, Stop-ORFs (StORFs) are identified in these unannotated regions. StORFs are open reading frames that are delimited by stop codons and thus can capture those genes most often missing in genome annotations. We show this methodology recovers genes missing from canonical genome annotations. We inspect the results of the genomes of model organisms, the pangenome of Escherichia coli, and a set of 5109 prokaryotic genomes of 247 genera from the Ensembl Bacteria database. StORF-Reporter extended the core, soft-core and accessory gene collections, identified novel gene families and extended families into additional genera. The high levels of sequence conservation observed between genera suggest that many of these StORFs are likely to be functional genes that should now be considered for inclusion in canonical annotations.}, } @article {pmid37894252, year = {2023}, author = {Kurihara, MNL and Santos, INM and Eisen, AKA and Caleiro, GS and Araújo, J and Sales, RO and Pignatari, AC and Salles, MJ}, title = {Phenotypic and Genotypic Characterization of Cutibacterium acnes Isolated from Shoulder Surgery Reveals Insights into Genetic Diversity.}, journal = {Microorganisms}, volume = {11}, number = {10}, pages = {}, doi = {10.3390/microorganisms11102594}, pmid = {37894252}, issn = {2076-2607}, support = {88887.500796/2020-00//Coordenação de Aperfeicoamento de Pessoal de Nível Superior/ ; 88887.627094/2021-00//Coordenação de Aperfeicoamento de Pessoal de Nível Superior/ ; }, abstract = {Specific virulence factors that likely influence C. acnes invasion into deep tissues remain to be elucidated. Herein, we describe the frequency of C. acnes identification in deep tissue specimens of patients undergoing clean shoulder surgery and assess its phenotypic and genetic traits associated with virulence and antibiotic resistance patterns, compared with isolates from the skin of healthy volunteers. Multiple deep tissue specimens from the bone fragments, tendons, and bursa of 84 otherwise healthy patients undergoing primary clean-open and arthroscopic shoulder surgeries were aseptically collected. The overall yield of tissue sample cultures was 21.5% (55/255), with 11.8% (30/255) identified as C. acnes in 27.3% (23/84) of patients. Antibiotic resistance rates were low, with most strains expressing susceptibility to first-line antibiotics, while a few were resistant to penicillin and rifampicin. Phylotypes IB (73.3%) and II (23.3%) were predominant in deep tissue samples. Genomic analysis demonstrated differences in the pangenome of the isolates from the same clade. Even though strains displayed a range of pathogenic markers, such as biofilm formation, patients did not evolve to infection during the 1-year follow-up. This suggests that the presence of polyclonal C. acnes in multiple deep tissue samples does not necessarily indicate infection.}, } @article {pmid37894121, year = {2023}, author = {Nedashkovskaya, O and Otstavnykh, N and Balabanova, L and Bystritskaya, E and Kim, SG and Zhukova, N and Tekutyeva, L and Isaeva, M}, title = {Rhodoalgimonas zhirmunskyi gen. nov., sp. nov., a Marine Alphaproteobacterium Isolated from the Pacific Red Alga Ahnfeltia tobuchiensis: Phenotypic Characterization and Pan-Genome Analysis.}, journal = {Microorganisms}, volume = {11}, number = {10}, pages = {}, doi = {10.3390/microorganisms11102463}, pmid = {37894121}, issn = {2076-2607}, support = {15.BRK.21.0004 (Contract No. 075-15-2021-1052)//the Ministry of Science and Higher Education, Russian Federation/ ; }, abstract = {A novel Gram-staining negative, strictly aerobic, rod-shaped, and non-motile bacterium, designated strain 10Alg 79[T], was isolated from the red alga Ahnfeltia tobuchiensis. A phylogenetic analysis based on 16S rRNA gene sequences placed the novel strain within the family Roseobacteraceae, class Alphaproteobacteria, phylum Pseudomonadota, where the nearest neighbor was Shimia sediminis ZQ172[T] (97.33% of identity). However, a phylogenomic study clearly showed that strain 10Alg 79[T] forms a distinct evolutionary lineage at the genus level within the family Roseobacteraceae combining with strains Aquicoccus porphyridii L1 8-17[T], Marimonas arenosa KCTC 52189[T], and Lentibacter algarum DSM 24677[T]. The ANI, AAI, and dDDH values between them were 75.63-78.15%, 67.41-73.08%, and 18.8-19.8%, respectively. The genome comprises 3,754,741 bp with a DNA GC content of 62.1 mol%. The prevalent fatty acids of strain 10Alg 79[T] were C18:1 ω7c and C16:0. The polar lipid profile consisted of phosphatidylethanolamine, phosphatidylglycerol, phosphatidylcholine, an unidentified aminolipid, an unidentified phospholipid and an unidentified lipid. A pan-genome analysis showed that the unique part of the 10Alg 79[T] genome consists of 13 genus-specific clusters and 413 singletons. The annotated singletons were more often related to transport protein systems, transcriptional regulators, and enzymes. A functional annotation of the draft genome sequence revealed that this bacterium could be a source of a new phosphorylase, which may be used for phosphoglycoside synthesis. A combination of the genotypic and phenotypic data showed that the bacterial isolate represents a novel species and a novel genus, for which the name Rhodoalgimonas zhirmunskyi gen. nov., sp. nov. is proposed. The type strain is 10Alg 79[T] (=KCTC 72611[T] = KMM 6723[T]).}, } @article {pmid37894103, year = {2023}, author = {Covas, C and Figueiredo, G and Gomes, M and Santos, T and Mendo, S and Caetano, TS}, title = {The Pangenome of Gram-Negative Environmental Bacteria Hides a Promising Biotechnological Potential.}, journal = {Microorganisms}, volume = {11}, number = {10}, pages = {}, doi = {10.3390/microorganisms11102445}, pmid = {37894103}, issn = {2076-2607}, support = {SFRH/BD/98446/2013//Fundação para a Ciência e Tecnologia/ ; CEECIND/01463/2017//Fundação para a Ciência e Tecnologia/ ; UIDP/50017/2020+UIDB/50017/2020+LA/P/0094/2020//Fundação para a Ciência e Tecnologia/ ; }, abstract = {Secondary metabolites (SMs) from environmental bacteria offer viable solutions for various health and environmental challenges. Researchers are employing advanced bioinformatic tools to investigate less-explored microorganisms and unearth novel bioactive compounds. In this research area, our understanding of SMs from environmental Gram-negative bacteria lags behind that of its Gram-positive counterparts. In this regard, Pedobacter spp. have recently gained attention, not only for their role as plant growth promoters but also for their potential in producing antimicrobials. This study focuses on the genomic analysis of Pedobacter spp. to unveil the diversity of the SMs encoded in their genomes. Among the 41 genomes analyzed, a total of 233 biosynthetic gene clusters (BGCs) were identified, revealing the potential for the production of diverse SMs, including RiPPs (27%), terpenes (22%), hybrid SMs (17%), PKs (12%), NRPs (9%) and siderophores (6%). Overall, BGC distribution did not correlate with phylogenetic lineage and most of the BGCs showed no significant hits in the MIBiG database, emphasizing the uniqueness of the compounds that Pedobacter spp. can produce. Of all the species examined, P. cryoconitis and P. lusitanus stood out for having the highest number and diversity of BGCs. Focusing on their applicability and ecological functions, we investigated in greater detail the BGCs responsible for siderophore and terpenoid production in these species and their relatives. Our findings suggest that P. cryoconitis and P. lusitanus have the potential to produce novel mixtures of siderophores, involving bifunctional IucAC/AcD NIS synthetases, as well as carotenoids and squalene. This study highlights the biotechnological potential of Pedobacter spp. in medicine, agriculture and other industries, emphasizing the need for a continued exploration of its SMs and their applications.}, } @article {pmid37894090, year = {2023}, author = {Alghamdi, M and Al-Judaibi, E and Al-Rashede, M and Al-Judaibi, A}, title = {Comparative De Novo and Pan-Genome Analysis of MDR Nosocomial Bacteria Isolated from Hospitals in Jeddah, Saudi Arabia.}, journal = {Microorganisms}, volume = {11}, number = {10}, pages = {}, doi = {10.3390/microorganisms11102432}, pmid = {37894090}, issn = {2076-2607}, abstract = {Multidrug-resistant (MDR) bacteria are one of the most serious threats to public health, and one of the most important types of MDR bacteria are those that are acquired in a hospital, known as nosocomial. This study aimed to isolate and identify MDR bacteria from selected hospitals in Jeddah and analyze their antibiotic-resistant genes. Bacteria were collected from different sources and wards of hospitals in Jeddah City. Phoenix BD was used to identify the strains and perform susceptibility testing. Identification of selected isolates showing MDR to more than three classes on antibiotics was based on 16S rRNA gene and whole genome sequencing. Genes conferring resistance were characterized using de novo and pan-genome analyses. In total, we isolated 108 bacterial strains, of which 75 (69.44%) were found to be MDR. Taxonomic identification revealed that 24 (32%) isolates were identified as Escherichia coli, 19 (25.3%) corresponded to Klebsiella pneumoniae, and 17 (22.67%) were methicillin-resistant Staphylococcus aureus (MRSA). Among the Gram-negative bacteria, K. pneumoniae isolates showed the highest resistance levels to most antibiotics. Of the Gram-positive bacteria, S. aureus (MRSA) strains were noticed to exhibit the uppermost degree of resistance to the tested antibiotics, which is higher than that observed for K. pneumoniae isolates. Taken together, our results illustrated that MDR Gram-negative bacteria are the most common cause of nosocomial infections, while MDR Gram-positive bacteria are characterized by a wider antibiotic resistance spectrum. Whole genome sequencing found the appearance of antibiotic resistance genes, including SHV, OXA, CTX-M, TEM-1, NDM-1, VIM-1, ere(A), ermA, ermB, ermC, msrA, qacA, qacB, and qacC.}, } @article {pmid37892197, year = {2023}, author = {Yaraguppi, DA and Bagewadi, ZK and Patil, NR and Mantri, N}, title = {Iturin: A Promising Cyclic Lipopeptide with Diverse Applications.}, journal = {Biomolecules}, volume = {13}, number = {10}, pages = {}, doi = {10.3390/biom13101515}, pmid = {37892197}, issn = {2218-273X}, abstract = {This comprehensive review examines iturin, a cyclic lipopeptide originating from Bacillus subtilis and related bacteria. These compounds are structurally diverse and possess potent inhibitory effects against plant disease-causing bacteria and fungi. Notably, Iturin A exhibits strong antifungal properties and low toxicity, making it valuable for bio-pesticides and mycosis treatment. Emerging research reveals additional capabilities, including anticancer and hemolytic features. Iturin finds applications across industries. In food, iturin as a biosurfactant serves beyond surface tension reduction, enhancing emulsions and texture. Biosurfactants are significant in soil remediation, agriculture, wound healing, and sustainability. They also show promise in Microbial Enhanced Oil Recovery (MEOR) in the petroleum industry. The pharmaceutical and cosmetic industries recognize iturin's diverse properties, such as antibacterial, antifungal, antiviral, anticancer, and anti-obesity effects. Cosmetic applications span emulsification, anti-wrinkle, and antibacterial use. Understanding iturin's structure, synthesis, and applications gains importance as biosurfactant and lipopeptide research advances. This review focuses on emphasizing iturin's structural characteristics, production methods, biological effects, and applications across industries. It probes iturin's antibacterial, antifungal potential, antiviral efficacy, and cancer treatment capabilities. It explores diverse applications in food, petroleum, pharmaceuticals, and cosmetics, considering recent developments, challenges, and prospects.}, } @article {pmid37891426, year = {2023}, author = {Gould, AL and Donohoo, SA and Román, ED and Neff, EE}, title = {Strain-level diversity of symbiont communities between individuals and populations of a bioluminescent fish.}, journal = {The ISME journal}, volume = {}, number = {}, pages = {}, pmid = {37891426}, issn = {1751-7370}, abstract = {The bioluminescent symbiosis involving the urchin cardinalfish, Siphamia tubifer, and Photobacterium mandapamensis, a luminous member of the Vibrionaceae, is highly specific compared to other bioluminescent fish-bacteria associations. Despite this high degree of specificity, patterns of genetic diversity have been observed for the symbionts from hosts sampled over relatively small spatial scales. We characterized and compared sub-species, strain-level symbiont diversity within and between S. tubifer hosts sampled from the Philippines and Japan using PCR fingerprinting. We then carried out whole genome sequencing of the unique symbiont genotypes identified to characterize the genetic diversity of the symbiont community and the symbiont pangenome. We determined that an individual light organ contains six symbiont genotypes on average, but varied between 1-13. Additionally, we found that there were few genotypes shared between hosts from the same location. A phylogenetic analysis of the unique symbiont strains indicated location-specific clades, suggesting some genetic differentiation in the symbionts between host populations. We also identified symbiont genes that were variable between strains, including luxF, a member of the lux operon, which is responsible for light production. We quantified the light emission and growth rate of two strains missing luxF along with the other strains isolated from the same light organs and determined that strains lacking luxF were dimmer but grew faster than most of the other strains, suggesting a potential metabolic trade-off. This study highlights the importance of strain-level diversity in microbial associations and provides new insight into the underlying genetic architecture of intraspecific symbiont communities within a host.}, } @article {pmid37887294, year = {2023}, author = {Bachari, A and Nassar, N and Telukutla, S and Zomer, R and Dekiwadia, C and Piva, TJ and Mantri, N}, title = {In Vitro Antiproliferative Effect of Cannabis Extract PHEC-66 on Melanoma Cell Lines.}, journal = {Cells}, volume = {12}, number = {20}, pages = {}, doi = {10.3390/cells12202450}, pmid = {37887294}, issn = {2073-4409}, support = {Not Applicable//MGC Pharmaceuticals Ltd/ ; }, abstract = {Melanoma, an aggressive form of skin cancer, can be fatal if not diagnosed and treated early. Melanoma is widely recognized to resist advanced cancer treatments, including immune checkpoint inhibitors, kinase inhibitors, and chemotherapy. Numerous studies have shown that various Cannabis sativa extracts exhibit potential anticancer effects against different types of tumours both in vitro and in vivo. This study is the first to report that PHEC-66, a Cannabis sativa extract, displays antiproliferative effects against MM418-C1, MM329 and MM96L melanoma cells. Although these findings suggest that PHEC-66 has promising potential as a pharmacotherapeutic agent for melanoma treatment, further research is necessary to evaluate its safety, efficacy, and clinical applications.}, } @article {pmid37884897, year = {2023}, author = {Depuydt, L and Renders, L and Abeel, T and Fostier, J}, title = {Pan-genome de Bruijn graph using the bidirectional FM-index.}, journal = {BMC bioinformatics}, volume = {24}, number = {1}, pages = {400}, pmid = {37884897}, issn = {1471-2105}, support = {1117322N//Fonds Wetenschappelijk Onderzoek/ ; 1SE7822N//Fonds Wetenschappelijk Onderzoek/ ; }, abstract = {BACKGROUND: Pan-genome graphs are gaining importance in the field of bioinformatics as data structures to represent and jointly analyze multiple genomes. Compacted de Bruijn graphs are inherently suited for this purpose, as their graph topology naturally reveals similarity and divergence within the pan-genome. Most state-of-the-art pan-genome graphs are represented explicitly in terms of nodes and edges. Recently, an alternative, implicit graph representation was proposed that builds directly upon the unidirectional FM-index. As such, a memory-efficient graph data structure is obtained that inherits the FM-index' backward search functionality. However, this representation suffers from a number of shortcomings in terms of functionality and algorithmic performance.

RESULTS: We present a data structure for a pan-genome, compacted de Bruijn graph that aims to address these shortcomings. It is built on the bidirectional FM-index, extending the ability of its unidirectional counterpart to navigate and search the graph in both directions. All basic graph navigation steps can be performed in constant time. Based on these features, we implement subgraph visualization as well as lossless approximate pattern matching to the graph using search schemes. We demonstrate that we can retrieve all occurrences corresponding to a read within a certain edit distance in a very efficient manner. Through a case study, we show the potential of exploiting the information embedded in the graph's topology through visualization and sequence alignment.

CONCLUSIONS: We propose a memory-efficient representation of the pan-genome graph that supports subgraph visualization and lossless approximate pattern matching of reads against the graph using search schemes. The C++ source code of our software, called Nexus, is available at https://github.com/biointec/nexus under AGPL-3.0 license.}, } @article {pmid37882557, year = {2023}, author = {Hoover, RL and Keffer, JL and Polson, SW and Chan, CS}, title = {Gallionellaceae pangenomic analysis reveals insight into phylogeny, metabolic flexibility, and iron oxidation mechanisms.}, journal = {mSystems}, volume = {}, number = {}, pages = {e0003823}, doi = {10.1128/msystems.00038-23}, pmid = {37882557}, issn = {2379-5077}, abstract = {The iron-oxidizing Gallionellaceae drive a wide variety of biogeochemical cycles through their metabolisms and biominerals. To better understand the environmental impacts of Gallionellaceae, we need to improve our knowledge of their diversity and metabolisms, especially any novel iron oxidation mechanisms. Here, we used a pangenomic analysis of 103 genomes to resolve Gallionellaceae phylogeny and explore their genomic potential. Using a concatenated ribosomal protein tree and key gene patterns, we determined Gallionellaceae has four genera, divided into two groups: iron-oxidizing bacteria (FeOB) Gallionella, Sideroxydans, and Ferriphaselus with iron oxidation genes (cyc2, mtoA) and nitrite-oxidizing bacteria (NOB) Candidatus Nitrotoga with the nitrite oxidase gene nxr. The FeOB and NOB have similar electron transport chains, including genes for reverse electron transport and carbon fixation. Auxiliary energy metabolisms, including S oxidation, denitrification, and organotrophy, were scattered throughout the FeOB. Within FeOB, we found genes that may represent adaptations for iron oxidation, including a variety of extracellular electron uptake mechanisms. FeOB genomes encoded more predicted c-type cytochromes than NOB genomes, notably more multiheme c-type cytochromes (MHCs) with >10 CXXCH motifs. These include homologs of several predicted outer membrane porin-MHC complexes, including MtoAB and Uet. MHCs efficiently conduct electrons across longer distances and function across a wide range of redox potentials that overlap with mineral redox potentials, which can expand the range of usable iron substrates. Overall, the results of pangenome analyses suggest that the Gallionellaceae genera Gallionella, Sideroxydans, and Ferriphaselus have acquired a range of adaptations to succeed in various environments but are primarily iron oxidizers.IMPORTANCENeutrophilic iron-oxidizing bacteria (FeOB) produce copious iron (oxyhydr)oxides that can profoundly influence biogeochemical cycles, notably the fate of carbon and many metals. To fully understand environmental microbial iron oxidation, we need a thorough accounting of iron oxidation mechanisms. In this study, we show the Gallionellaceae FeOB genomes encode both characterized iron oxidases as well as uncharacterized multiheme cytochromes (MHCs). MHCs are predicted to transfer electrons from extracellular substrates and likely confer metabolic capabilities that help Gallionellaceae occupy a range of different iron- and mineral-rich niches. Gallionellaceae appear to specialize in iron oxidation, so it would be advantageous for them to have multiple mechanisms to oxidize various forms of iron, given the many iron minerals on Earth, as well as the physiological and kinetic challenges faced by FeOB. The multiple iron/mineral oxidation mechanisms may help drive the widespread ecological success of Gallionellaceae.}, } @article {pmid37882526, year = {2023}, author = {Pérez Castro, S and Peredo, EL and Mason, OU and Vineis, J and Bowen, JL and Mortazavi, B and Ganesh, A and Ruff, SE and Paul, BG and Giblin, AE and Cardon, ZG}, title = {Diversity at single nucleotide to pangenome scales among sulfur cycling bacteria in salt marshes.}, journal = {Applied and environmental microbiology}, volume = {}, number = {}, pages = {e0098823}, doi = {10.1128/aem.00988-23}, pmid = {37882526}, issn = {1098-5336}, abstract = {Sulfur-cycling microbial communities in salt marsh rhizosphere sediments mediate a recycling and detoxification system central to plant productivity. Despite the importance of sulfur-cycling microbes, their biogeographic, phylogenetic, and functional diversity remain poorly understood. Here, we use metagenomic data sets from Massachusetts (MA) and Alabama (AL) salt marshes to examine the distribution and genomic diversity of sulfur-cycling plant-associated microbes. Samples were collected from sediments under Sporobolus alterniflorus and Sporobolus pumilus in separate MA vegetation zones, and under S. alterniflorus and Juncus roemerianus co-occuring in AL. We grouped metagenomic data by plant species and site and identified 38 MAGs that included pathways for sulfate reduction or sulfur oxidation. Phylogenetic analyses indicated that 29 of the 38 were affiliated with uncultivated lineages. We showed differentiation in the distribution of MAGs between AL and MA, between S. alterniflorus and S. pumilus vegetation zones in MA, but no differentiation between S. alterniflorus and J. roemerianus in AL. Pangenomic analyses of eight ubiquitous MAGs also detected site- and vegetation-specific genomic features, including varied sulfur-cycling operons, carbon fixation pathways, fixed single-nucleotide variants, and active diversity-generating retroelements. This genetic diversity, detected at multiple scales, suggests evolutionary relationships affected by distance and local environment, and demonstrates differential microbial capacities for sulfur and carbon cycling in salt marsh sediments.IMPORTANCESalt marshes are known for their significant carbon storage capacity, and sulfur cycling is closely linked with the ecosystem-scale carbon cycling in these ecosystems. Sulfate reducers are key for the decomposition of organic matter, and sulfur oxidizers remove toxic sulfide, supporting the productivity of marsh plants. To date, the complexity of coastal environments, heterogeneity of the rhizosphere, high microbial diversity, and uncultured majority hindered our understanding of the genomic diversity of sulfur-cycling microbes in salt marshes. Here, we use comparative genomics to overcome these challenges and provide an in-depth characterization of sulfur-cycling microbial diversity in salt marshes. We characterize communities across distinct sites and plant species and uncover extensive genomic diversity at the taxon level and specific genomic features present in MAGs affiliated with uncultivated sulfur-cycling lineages. Our work provides insights into the partnerships in salt marshes and a roadmap for multiscale analyses of diversity in complex biological systems.}, } @article {pmid37876012, year = {2023}, author = {Yu, J and Jiang, C and Yamano, R and Koike, S and Sakai, Y and Mino, S and Sawabe, T}, title = {Unveiling the early life core microbiome of the sea cucumber Apostichopus japonicus and the unexpected abundance of the growth-promoting Sulfitobacter.}, journal = {Animal microbiome}, volume = {5}, number = {1}, pages = {54}, pmid = {37876012}, issn = {2524-4671}, abstract = {BACKGROUND: Microbiome in early life has long-term effects on the host's immunological and physiological development and its disturbance is known to trigger various diseases in host Deuterostome animals. The sea cucumber Apostichopus japonicus is one of the most valuable marine Deuterostome invertebrates in Asia and a model animal in regeneration studies. To understand factors that impact on host development and holobiont maintenance, host-microbiome association has been actively studied in the last decade. However, we currently lack knowledge of early life core microbiome during its ontogenesis and how it benefits the host's growth.

RESULTS: We analyzed the microbial community in 28 sea cucumber samples from a laboratory breeding system, designed to replicate aquaculture environments, across six developmental stages (fertilized eggs to the juvenile stage) over a three years-period to examine the microbiomes' dynamics and stability. Microbiome shifts occurred during sea cucumber larval ontogenesis in every case. Application of the most sophisticated core microbiome extraction methodology, a hybrid approach with abundance-occupancy core microbiome analyses (top 75% of total reads and > 70% occupation) and core index calculation, first revealed early life core microbiome consisted of Alteromonadaceae and Rhodobacteraceae, as well as a stage core microbiome consisting of pioneer core microbe Pseudoalteromonadaceae in A. japonicus, suggesting a stepwise establishment of microbiome related to ontogenesis and feeding behavior in A. japonicus. More interestingly, four ASVs affiliated to Alteromonadaceae and Rhodobacteraceae were extracted as early life core microbiome. One of the ASV (ASV0007) was affiliated to the Sulfitobactor strain BL28 (Rhodobacteraceae), isolated from blastula larvae in the 2019 raring batch. Unexpectedly, a bioassay revealed the BL28 strain retains a host growth-promoting ability. Further meta-pangenomics approach revealed the BL28 genome reads were abundant in the metagenomic sequence pool, in particular, in that of post-gut development in early life stages of A. japonicus.

CONCLUSION: Repeated rearing efforts of A. japonicus using laboratory aquaculture replicating aquaculture environments and hybrid core microbiome extraction approach first revealed particular ASVs affiliated to Alteromonadaceae and Rhodobacteraceae as the A. japonicus early life core microbiome. Further bioassay revealed the growth promoting ability to the host sea cucumber in one of the core microbes, the Sulfitobactor strain BL28 identified as ASV0007. Genome reads of the BL28 were abundant in post-gut development of A. japonicus, which makes us consider effective probiotic uses of those core microbiome for sea cucumber resource production and conservation. The study also emphasizes the importance of the core microbiome in influencing early life stages in marine invertebrates. Understanding these dynamics could offer pathways to improve growth, immunity, and disease resistance in marine invertebrates.}, } @article {pmid37873245, year = {2023}, author = {Islam, MM and Kolling, GL and Glass, EM and Goldberg, JB and Papin, JA}, title = {Model-driven characterization of functional diversity of Pseudomonas aeruginosa clinical isolates with broadly representative phenotypes.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.10.08.561426}, pmid = {37873245}, abstract = {UNLABELLED: Pseudomonas aeruginosa is a leading cause of infections in immunocompromised individuals and in healthcare settings. This study aims to understand the relationships between phenotypic diversity and the functional metabolic landscape of P. aeruginosa clinical isolates. To better understand the metabolic repertoire of P. aeruginosa in infection, we deeply profiled a representative set from a library of 971 clinical P. aeruginosa isolates with corresponding patient metadata and bacterial phenotypes. The genotypic clustering based on whole-genome sequencing of the isolates, multi-locus sequence types, and the phenotypic clustering generated from a multi-parametric analysis were compared to each other to assess the genotype-phenotype correlation. Genome-scale metabolic network reconstructions were developed for each isolate through amendments to an existing PA14 network reconstruction. These network reconstructions show diverse metabolic functionalities and enhance the collective P. aeruginosa pangenome metabolic repertoire. Characterizing this rich set of clinical P. aeruginosa isolates allows for a deeper understanding of the genotypic and metabolic diversity of the pathogen in a clinical setting and lays a foundation for further investigation of the metabolic landscape of this pathogen and host-associated metabolic differences during infection.

IMPACT STATEMENT: Pseudomonas aeruginosa is a leading cause of infections in immunocompromised individuals and in healthcare settings. The treatment of these infections is complicated by the presence of a variety of virulence mechanisms and metabolic uniqueness among clinically relevant strains. This study is an attempt to understand the relationships between isolate phenotypic diversity and the functional metabolic landscape within a representative group of P. aeruginosa clinical isolates. Characterizing this rich set of clinical P. aeruginosa isolates allows for a deeper understanding of genotypic and metabolic diversity of the pathogen in a clinical setting and lays a foundation for further investigation of the metabolic landscape of this pathogen and host-associated metabolic differences in infection.}, } @article {pmid37868321, year = {2023}, author = {Gao, Z and Bian, J and Lu, F and Jiao, Y and He, H}, title = {Corrigendum: Triticeae crop genome biology: an endless frontier.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1280660}, doi = {10.3389/fpls.2023.1280660}, pmid = {37868321}, issn = {1664-462X}, abstract = {[This corrects the article DOI: 10.3389/fpls.2023.1222681.].}, } @article {pmid37864332, year = {2023}, author = {Liang, Y and Han, Y}, title = {Pan-genome brings opportunities to revitalize ancient crop foxtail millet.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100735}, doi = {10.1016/j.xplc.2023.100735}, pmid = {37864332}, issn = {2590-3462}, abstract = {The annual grass, foxtail millet (Setaria italica), was first domesticated ∼11,000 years ago, making it one of the most ancient crops in the world, and it was the mainstay underpinning the development of Asian farming civilization. The looming food shortage crisis aggravated by climate change threatens to make current agriculture unsustainable. As a C4 photosynthetic plant, foxtail millet has attracted increasing attention from the scientific and industrial farming communities because of its drought tolerance, good adaptability and nutritional properties. Foxtail millet and green foxtail (Setaria viridis) have been developed into ideal model systems for C4 crops due to their compact diploid genomes, rich genetic diversity, self-pollination, high-throughput transformation, short life cycles and ease of laboratory culture.}, } @article {pmid37858045, year = {2023}, author = {Cumsille, A and Serna-Cardona, N and González, V and Claverías, F and Undabarrena, A and Molina, V and Salvà-Serra, F and Moore, ERB and Cámara, B}, title = {Exploring the biosynthetic gene clusters in Brevibacterium: a comparative genomic analysis of diversity and distribution.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {622}, pmid = {37858045}, issn = {1471-2164}, support = {21191625//Agencia Nacional de Investigación y Desarrollo/ ; 1221264//Agencia Nacional de Investigación y Desarrollo/ ; }, abstract = {Exploring Brevibacterium strains from various ecosystems may lead to the discovery of new antibiotic-producing strains. Brevibacterium sp. H-BE7, a strain isolated from marine sediments from Northern Patagonia, Chile, had its genome sequenced to study the biosynthetic potential to produce novel natural products within the Brevibacterium genus. The genome sequences of 98 Brevibacterium strains, including strain H-BE7, were selected for a genomic analysis. A phylogenomic cladogram was generated, which divided the Brevibacterium strains into four major clades. A total of 25 strains are potentially unique new species according to Average Nucleotide Identity (ANIb) values. These strains were isolated from various environments, emphasizing the importance of exploring diverse ecosystems to discover the full diversity of Brevibacterium. Pangenome analysis of Brevibacterium strains revealed that only 2.5% of gene clusters are included within the core genome, and most gene clusters occur either as singletons or as cloud genes present in less than ten strains. Brevibacterium strains from various phylogenomic clades exhibit diverse BGCs. Specific groups of BGCs show clade-specific distribution patterns, such as siderophore BGCs and carotenoid-related BGCs. A group of clade IV-A Brevibacterium strains possess a clade-specific Polyketide synthase (PKS) BGCs that connects with phenazine-related BGCs. Within the PKS BGC, five genes, including the biosynthetic PKS gene, participate in the mevalonate pathway and exhibit similarities with the phenazine A BGC. However, additional core biosynthetic phenazine genes were exclusively discovered in nine Brevibacterium strains, primarily isolated from cheese. Evaluating the antibacterial activity of strain H-BE7, it exhibited antimicrobial activity against Salmonella enterica and Listeria monocytogenes. Chemical dereplication identified bioactive compounds, such as 1-methoxyphenazine in the crude extracts of strain H-BE7, which could be responsible of the observed antibacterial activity. While strain H-BE7 lacks the core phenazine biosynthetic genes, it produces 1-methoxyphenazine, indicating the presence of an unknown biosynthetic pathway for this compound. This suggests the existence of alternative biosynthetic pathways or promiscuous enzymes within H-BE7's genome.}, } @article {pmid37854939, year = {2023}, author = {Srivastava, N and Shiburaj, S and Khare, SK}, title = {Pan-genomic comparison of a potential solvent-tolerant alkaline protease-producing Exiguobacterium sp. TBG-PICH-001 isolated from a marine habitat.}, journal = {3 Biotech}, volume = {13}, number = {11}, pages = {371}, pmid = {37854939}, issn = {2190-572X}, abstract = {UNLABELLED: The identification and applicability of bacteria are inconclusive until comprehended with genomic repositories. Our isolate, Exiguobacterium sp. TBG-PICH-001 exhibited excellent halo- and organic solvent tolerance with simultaneous production of alkaline protease/s (0.512 IU/mL). The crude protease (1 IU) showed a 43.57% degradation of whey protein. The bulk proteins in the whey were hydrolyzed to smaller peptides which were evident in the SDS-PAGE profile. With such characteristics, the isolate became interesting for its genomic studies. The TBG-PICH-001 genome was found to be 3.14 Mb in size with 17 contigs and 47.33% GC content. The genome showed 3176 coding genes, and 2699 genes were characterized for their functionality. The Next-Generation-Sequencing of the genome identified only the isolate's genus; hence we attempted to delineate its species position. The genomes of the isolate and other representative Exiguobacterium spp. were compared based on orthologous genes (Orthovenn2 server). A pan-genomic analysis revealed the match of TBG-PICH-001 with 15 uncharacterized Exiguobacterium genomes at the species level. All these collectively matched with Exiguobacterium indicum, and the results were reconfirmed through phylogenetic studies. Further, the Exiguobacterium indicum genomes were engaged for homology studies rendering 11 classes of protease genes. Two putative proteases (Zinc metalloprotease and Serine protease) obtained from homology were checked for PCR amplification using genomic DNA of TBG-PICH-001 and other Exiguobacterium genomes. The results showed amplification only in the Exiguobacterium indicum genome. These protease genes, after sequencing, were matched with the TBG-PICH-001 genome. Their presence in its whole genome experimentally validated the study.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-023-03796-5.}, } @article {pmid36945625, year = {2023}, author = {Hadjifrangiskou, M and Reasoner, S and Flores, V and Van Horn, G and Morales, G and Peard, L and Abelson, B and Manuel, C and Lee, J and Baker, B and Williams, T and Schmitz, J and Clayton, D}, title = {Defining the Infant Male Urobiome and Moving Towards Mechanisms in Urobiome Research.}, journal = {Research square}, volume = {}, number = {}, pages = {}, pmid = {36945625}, support = {F30 AI169748/AI/NIAID NIH HHS/United States ; }, abstract = {The urinary bladder harbors a community of microbes termed the urobiome, which remains understudied. In this study, we present the urobiome of healthy infant males from samples collected by transurethral catheterization. Using a combination of extended culture and amplicon sequencing, we identify several common bacterial genera that can be further investigated for their effects on urinary health across the lifespan. Many genera were shared between all samples suggesting a consistent urobiome composition among this cohort. We note that, for this cohort, early life exposures including mode of birth (vaginal vs. Caesarean section), or prior antibiotic exposure did not influence urobiome composition. In addition, we report the isolation of culturable bacteria from the bladders of these infant males, including Actinotignum schaalii, a bacterial species that has been associated with urinary tract infection in older male adults. Herein, we isolate and sequence 9 distinct strains of A. schaalii enhancing the genomic knowledge surrounding this species and opening avenues for delineating the microbiology of this urobiome constituent. Furthermore, we present a framework for using the combination of culture-dependent and sequencing methodologies for uncovering mechanisms in the urobiome.}, } @article {pmid37847672, year = {2023}, author = {Connor, CH and Zucoloto, AZ and Munnoch, JT and Yu, IL and Corander, J and Hoskisson, PA and McDonald, B and McNally, A}, title = {Multidrug-resistant E. coli encoding high genetic diversity in carbohydrate metabolism genes displace commensal E. coli from the intestinal tract.}, journal = {PLoS biology}, volume = {21}, number = {10}, pages = {e3002329}, doi = {10.1371/journal.pbio.3002329}, pmid = {37847672}, issn = {1545-7885}, abstract = {Extra-intestinal pathogenic Escherichia coli (ExPEC) can cause a variety of infections outside of the intestine and are a major causative agent of urinary tract infections. Treatment of these infections is increasingly frustrated by antimicrobial resistance (AMR) diminishing the number of effective therapies available to clinicians. Incidence of multidrug resistance (MDR) is not uniform across the phylogenetic spectrum of E. coli. Instead, AMR is concentrated in select lineages, such as ST131, which are MDR pandemic clones that have spread AMR globally. Using a gnotobiotic mouse model, we demonstrate that an MDR E. coli ST131 is capable of out-competing and displacing non-MDR E. coli from the gut in vivo. This is achieved in the absence of antibiotic treatment mediating a selective advantage. In mice colonised with non-MDR E. coli strains, challenge with MDR E. coli either by oral gavage or co-housing with MDR E. coli colonised mice results in displacement and dominant intestinal colonisation by MDR E. coli ST131. To investigate the genetic basis of this superior gut colonisation ability by MDR E. coli, we assayed the metabolic capabilities of our strains using a Biolog phenotypic microarray revealing altered carbon metabolism. Functional pangenomic analysis of 19,571 E. coli genomes revealed that carriage of AMR genes is associated with increased diversity in carbohydrate metabolism genes. The data presented here demonstrate that independent of antibiotic selective pressures, MDR E. coli display a competitive advantage to colonise the mammalian gut and points to a vital role of metabolism in the evolution and success of MDR lineages of E. coli via carriage and spread.}, } @article {pmid37847157, year = {2023}, author = {Zhang, Z and Zhao, J and Li, J and Yao, J and Wang, B and Ma, Y and Li, N and Wang, H and Wang, T and Liu, B and Gong, L}, title = {Evolutionary trajectory of organelle-derived nuclear DNAs in the Triticum/Aegilops complex species.}, journal = {Plant physiology}, volume = {}, number = {}, pages = {}, doi = {10.1093/plphys/kiad552}, pmid = {37847157}, issn = {1532-2548}, abstract = {Organelle-derived nuclear DNAs, nuclear plastid DNAs (NUPTs) and nuclear mitochondrial DNAs (NUMTs) have been identified in plants. Most, if not all, genes residing in NUPTs/NUMTs (NUPGs/NUMGs) are known to be inactivated and pseudogenized. However, the role of epigenetic control in silencing NUPGs/NUMGs and the dynamic evolution of NUPTs/NUMTs with respect to organismal phylogeny remain barely explored. Based on the available nuclear and organellar genomic resources of wheat (genus Triticum) and goat grass (genus Aegilops) within Triticum/Aegilops complex species, we investigated the evolutionary fates of NUPTs/NUMTs in terms of their epigenetic silencing and their dynamic occurrence rates in the nuclear diploid genomes and allopolyploid subgenomes. NUPTs and NUMTs possessed similar genomic atlas, including (i) predominantly located in intergenic regions and preferential integration to gene regulation regions and (ii) generating sequence variations in the nuclear genome. Unlike nuclear indigenous genes, the alien NUPGs/NUMGs were associated with repressive epigenetic signals, namely high levels of DNA methylation and low levels of active histone modifications. Phylogenomic analyses suggested that the species-specific and gradual accumulation of NUPTs/NUMTs accompanied the speciation processes. Moreover, based on further pan-genomic analyses, we found significant subgenomic asymmetry in the NUPT/NUMT occurrence, which accumulated during allopolyploid wheat evolution. Our findings provide insight into the dynamic evolutionary fates of organelle-derived nuclear DNA in plants.}, } @article {pmid37846049, year = {2023}, author = {Aylward, AJ and Petrus, S and Mamerto, A and Hartwick, NT and Michael, TP}, title = {PanKmer: k-mer based and reference-free pangenome analysis.}, journal = {Bioinformatics (Oxford, England)}, volume = {}, number = {}, pages = {}, doi = {10.1093/bioinformatics/btad621}, pmid = {37846049}, issn = {1367-4811}, abstract = {SUMMARY: Pangenomes are replacing single reference genomes as the definitive representation of DNA sequence within a species or clade. Pangenome analysis predominantly leverages graph-based methods that require computationally intensive multiple genome alignments, do not scale to highly complex eukaryotic genomes, limit their scope to identifying structural variants (SVs), or incur bias by relying on a reference genome. Here, we present PanKmer, a toolkit designed for reference-free analysis of pangenome datasets consisting of dozens to thousands of individual genomes. PanKmer decomposes a set of input genomes into a table of observed k-mers and their presence-absence values in each genome. These are stored in an efficient k-mer index data format that encodes SNPs, INDELs, and SVs. It also includes functions for downstream analysis of the k-mer index, such as calculating sequence similarity statistics between individuals at whole-genome or local scales. For example, k-mers can be "anchored" in any individual genome to quantify sequence variability or conservation at a specific locus. This facilitates workflows with various biological applications, e.g. identifying cases of hybridization between plant species. PanKmer provides researchers with a valuable and convenient means to explore the full scope of genetic variation in a population, without reference bias.

PanKmer is implemented as a Python package with components written in Rust, released under a BSD license. The source code is available from the Python Package Index (PyPI) at https://pypi.org/project/pankmer/ as well as Gitlab at https://gitlab.com/salk-tm/pankmer. Full documentation is available at https://salk-tm.gitlab.io/pankmer/.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid37841331, year = {2023}, author = {Asif, M and Li-Qun, Z and Zeng, Q and Atiq, M and Ahmad, K and Tariq, A and Al-Ansari, N and Blom, J and Fenske, L and Alodaini, HA and Hatamleh, AA}, title = {Comprehensive genomic analysis of Bacillus paralicheniformis strain BP9, pan-genomic and genetic basis of biocontrol mechanism.}, journal = {Computational and structural biotechnology journal}, volume = {21}, number = {}, pages = {4647-4662}, pmid = {37841331}, issn = {2001-0370}, abstract = {Many Bacillus species are essential antibacterial agents, but their antibiosis potential still needs to be elucidated to its full extent. Here, we isolated a soil bacterium, BP9, which has significant antibiosis activity against fungal and bacterial pathogens. BP9 improved the growth of wheat seedlings via active colonization and demonstrated effective biofilm and swarming activity. BP9 sequenced genome contains 4282 genes with a mean G-C content of 45.94% of the whole genome. A single copy concatenated 802 core genes of 28 genomes, and their calculated average nucleotide identity (ANI) discriminated the strain BP9 from Bacillus licheniformis and classified it as Bacillus paralicheniformis. Furthermore, a comparative pan-genome analysis of 40 B. paralicheniformis strains suggested that the genetic repertoire of BP9 belongs to open-type genome species. A comparative analysis of a pan-genome dataset using the Kyoto Encyclopedia of Genes and Genomes (KEGG) and Cluster of Orthologous Gene groups (COG) revealed the diversity of secondary metabolic pathways, where BP9 distinguishes itself by exhibiting a greater prevalence of loci associated with the metabolism and transportation of organic and inorganic substances, carbohydrate and amino acid for effective inhabitation in diverse environments. The primary secondary metabolites and their genes involved in synthesizing bacillibactin, fencing, bacitracin, and lantibiotics were identified as acquired through a recent Horizontal gene transfer (HGT) event, which contributes to a significant part of the strain`s antimicrobial potential. Finally, we report some genes essential for plant-host interaction identified in BP9, which reduce spore germination and virulence of multiple fungal and bacterial species. The effective colonization, diverse predicted metabolic pathways and secondary metabolites (antibiotics) suggest testing the suitability of strain BP9 as a potential bio-preparation in agricultural fields.}, } @article {pmid37835381, year = {2023}, author = {Giguère, A and Raymond-Bouchard, I and Collin, V and Claveau, JS and Hébert, J and LeBlanc, R}, title = {Optical Genome Mapping Reveals the Complex Genetic Landscape of Myeloma.}, journal = {Cancers}, volume = {15}, number = {19}, pages = {}, doi = {10.3390/cancers15194687}, pmid = {37835381}, issn = {2072-6694}, support = {N/A//Canada Research Chairs/ ; N/A//Fonds de Recherche du Québec - Santé/ ; }, abstract = {Fluorescence in situ hybridization (FISH) on enriched CD138 plasma cells is the standard method for identification of clinically relevant genetic abnormalities in multiple myeloma. However, FISH is a targeted analysis that can be challenging due to the genetic complexity of myeloma. The aim of this study was to evaluate the potential of optical genome mapping (OGM) to detect clinically significant cytogenetic abnormalities in myeloma and to provide larger pangenomic information. OGM and FISH analyses were performed on CD138-purified cells of 20 myeloma patients. OGM successfully detected structural variants (SVs) (IGH and MYC rearrangements), copy number variants (CNVs) (17p/TP53 deletion, 1p deletion and 1q gain/amplification) and aneuploidy (gains of odd-numbered chromosomes, monosomy 13) classically expected with myeloma and led to a 30% increase in prognosis yield at our institution when compared to FISH. Despite challenges in the interpretation of OGM calls for CNV and aneuploidy losses in non-diploid genomes, OGM has the potential to replace FISH as the standard of care analysis in clinical settings and to efficiently change how we identify prognostic and predictive markers for therapies in the future. To our knowledge, this is the first study highlighting the feasibility and clinical utility of OGM in myeloma.}, } @article {pmid37832344, year = {2023}, author = {Latifi, T and Jalilvand, S and Golsaz-Shirazi, F and Arashkia, A and Kachooei, A and Afchangi, A and Zafarian, S and Roohvand, F and Shoja, Z}, title = {Characterization and immunogenicity of a novel chimeric hepatitis B core-virus like particles (cVLPs) carrying rotavirus VP8*protein in mice model.}, journal = {Virology}, volume = {588}, number = {}, pages = {109903}, doi = {10.1016/j.virol.2023.109903}, pmid = {37832344}, issn = {1096-0341}, abstract = {Given the efficacy and safety issues of the WHO for approved/prequalified live attenuated rotavirus (RV) vaccines, studies on alternative non-replicating modals and proper RV antigens are actively undertaken. Herein, we report the novel chimeric hepatitis B core-virus like particles (VLPs) carrying RV VP8*26-231 protein of a P [8] strain (cVLPVP8*), as a parenteral VLP RV vaccine candidate. SDS-PAGE and Western blotting analyses indicated the expected size of the E. coli-derived HBc-VP8* protein that self-assembled to cVLPVP8* particles. Immunization in mice indicated development of higher levels of IgG and IgA as well as higher IgG1/IgG2a ratios by cVLPVP8* vaccination compared to the VP8* alone. Assessment of neutralizing antibodies (nAbs) indicated development of heterotypic nAbs with cross-reactivity to a heterotypic RV strain by cVLPVP8* immunization compared to VP8* alone. The observed anti-VP8* cross-reactivity might indicate the possibility of developing a Pan-genomic RVA vaccine based on the cVLPVP8* formulation that deserves further challenge studies.}, } @article {pmid37829450, year = {2023}, author = {Jiang, ZM and Deng, Y and Han, XF and Su, J and Wang, H and Yu, LY and Zhang, YQ}, title = {Corrigendum: Geminicoccus flavidas sp. nov. and Geminicoccus harenae sp. nov., two IAA-producing novel rare bacterial species inhabiting desert biological soil crusts.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1285950}, doi = {10.3389/fmicb.2023.1285950}, pmid = {37829450}, issn = {1664-302X}, abstract = {[This corrects the article DOI: 10.3389/fmicb.2022.1034816.].}, } @article {pmid37823548, year = {2023}, author = {Baby, V and Ambroset, C and Gaurivaud, P and Falquet, L and Boury, C and Guichoux, E and Jores, J and Lartigue, C and Tardy, F and Sirand-Pugnet, P}, title = {Comparative genomics of Mycoplasma feriruminatoris, a fast-growing pathogen of wild Caprinae.}, journal = {Microbial genomics}, volume = {9}, number = {10}, pages = {}, doi = {10.1099/mgen.0.001112}, pmid = {37823548}, issn = {2057-5858}, abstract = {Mycoplasma feriruminatoris is a fast-growing Mycoplasma species isolated from wild Caprinae and first described in 2013. M. feriruminatoris isolates have been associated with arthritis, kerato conjunctivitis, pneumonia and septicemia, but were also recovered from apparently healthy animals. To better understand what defines this species, we performed a genomic survey on 14 strains collected from free-ranging or zoo-housed animals between 1987 and 2017, mostly in Europe. The average chromosome size of the M. feriruminatoris strains was 1,040±0,024 kbp, with 24 % G+C and 852±31 CDS. The core genome and pan-genome of the M. feriruminatoris species contained 628 and 1312 protein families, respectively. The M. feriruminatoris strains displayed a relatively closed pan-genome, with many features and putative virulence factors shared with species from the M. mycoides cluster, including the MIB-MIP Ig cleavage system, a repertoire of DUF285 surface proteins and a complete biosynthetic pathway for galactan. M. feriruminatoris genomes were found to be mostly syntenic, although repertoires of mobile genetic elements, including Mycoplasma Integrative and Conjugative Elements, insertion sequences, and a single plasmid varied. Phylogenetic- and gene content analyses confirmed that M. feriruminatoris was closer to the M. mycoides cluster than to the ruminant species M. yeatsii and M. putrefaciens. Ancestral genome reconstruction showed that the emergence of the M. feriruminatoris species was associated with the gain of 17 gene families, some of which encode defence enzymes and surface proteins, and the loss of 25 others, some of which are involved in sugar transport and metabolism. This comparative study suggests that the M. mycoides cluster could be extended to include M. feriruminatoris. We also find evidence that the specific organization and structure of the DnaA boxes around the oriC of M. feriruminatoris may contribute to drive the remarkable fast growth of this minimal bacterium.}, } @article {pmid37817747, year = {2023}, author = {Beard, S and Moya-Beltrán, A and Silva-García, D and Valenzuela, C and Pérez-Acle, T and Loyola, A and Quatrini, R}, title = {Pangenome-level analysis of nucleoid-associated proteins in the Acidithiobacillia class: insights into their functional roles in mobile genetic elements biology.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1271138}, pmid = {37817747}, issn = {1664-302X}, abstract = {Mobile genetic elements (MGEs) are relevant agents in bacterial adaptation and evolutionary diversification. Stable appropriation of these DNA elements depends on host factors, among which are the nucleoid-associated proteins (NAPs). NAPs are highly abundant proteins that bind and bend DNA, altering its topology and folding, thus affecting all known cellular DNA processes from replication to expression. Even though NAP coding genes are found in most prokaryotic genomes, their functions in host chromosome biology and xenogeneic silencing are only known for a few NAP families. Less is known about the occurrence, abundance, and roles of MGE-encoded NAPs in foreign elements establishment and mobility. In this study, we used a combination of comparative genomics and phylogenetic strategies to gain insights into the diversity, distribution, and functional roles of NAPs within the class Acidithiobacillia with a special focus on their role in MGE biology. Acidithiobacillia class members are aerobic, chemolithoautotrophic, acidophilic sulfur-oxidizers, encompassing substantial genotypic diversity attributable to MGEs. Our search for NAP protein families (PFs) in more than 90 genomes of the different species that conform the class, revealed the presence of 1,197 proteins pertaining to 12 different NAP families, with differential occurrence and conservation across species. Pangenome-level analysis revealed 6 core NAP PFs that were highly conserved across the class, some of which also existed as variant forms of scattered occurrence, in addition to NAPs of taxa-restricted distribution. Core NAPs identified are reckoned as essential based on the conservation of genomic context and phylogenetic signals. In turn, various highly diversified NAPs pertaining to the flexible gene complement of the class, were found to be encoded in known plasmids or, larger integrated MGEs or, present in genomic loci associated with MGE-hallmark genes, pointing to their role in the stabilization/maintenance of these elements in strains and species with larger genomes. Both core and flexible NAPs identified proved valuable as markers, the former accurately recapitulating the phylogeny of the class, and the later, as seed in the bioinformatic identification of novel episomal and integrated mobile elements.}, } @article {pmid37811910, year = {2023}, author = {Le, VV and Ko, SR and Oh, HM and Ahn, CY}, title = {Genomic Insights into Paucibacter aquatile DH15, a Cyanobactericidal Bacterium, and Comparative Genomics of the Genus Paucibacter.}, journal = {Journal of microbiology and biotechnology}, volume = {33}, number = {12}, pages = {1-10}, doi = {10.4014/jmb.2307.07008}, pmid = {37811910}, issn = {1738-8872}, abstract = {Microcystis blooms threaten ecosystem function and cause substantial economic losses. Microorganism-based methods, mainly using cyanobactericidal bacteria, are considered one of the most ecologically sound methods to control Microcystis blooms. This study focused on gaining genomic insights into Paucibacter aquatile DH15 that exhibited excellent cyanobactericidal effects against Microcystis. Additionally, a pan-genome analysis of the genus Paucibacter was conducted to enhance our understanding of the ecophysiological significance of this genus. Based on phylogenomic analyses, strain DH15 was classified as a member of the species Paucibacter aquatile. The genome analysis supported that strain DH15 can effectively destroy Microcystis, possibly due to the specific genes involved in the flagellar synthesis, cell wall degradation, and the production of cyanobactericidal compounds. The pan-genome analysis revealed the diversity and adaptability of the genus Paucibacter, highlighting its potential to absorb external genetic elements. Paucibacter species were anticipated to play a vital role in the ecosystem by potentially providing essential nutrients, such as vitamins B7, B12, and heme, to auxotrophic microbial groups. Overall, our findings contribute to understanding the molecular mechanisms underlying the action of cyanobactericidal bacteria against Microcystis and shed light on the ecological significance of the genus Paucibacter.}, } @article {pmid37811774, year = {2023}, author = {Ishaq, Z and Zaheer, T and Waseem, M and Shahwar Awan, H and Ullah, N and AlAsmari, AF and AlAsmari, F and Ali, A}, title = {Immunoinformatics aided designing of a next generation poly-epitope vaccine against uropathogenic Escherichia coli to combat urinary tract infections.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-21}, doi = {10.1080/07391102.2023.2266018}, pmid = {37811774}, issn = {1538-0254}, abstract = {Urinary tract infections (UTIs) are the second most prevalent bacterial infections and uropathogenic Escherichia coli (UPEC) stands among the primary causative agents of UTIs. The usage of antibiotics is the routine therapy being used in various countries to treat UTIs but becoming ineffective because of increasing antibiotic resistance among UPEC strains. Thus, there must be the development of some alternative treatment strategies such as vaccine development against UPEC. In the following study, pan-genomics along with reverse vaccinology approaches is used under the framework of bioinformatics for the identification of core putative vaccine candidates, employing 307 UPEC genomes (complete and draft), available publicly. A total of nine T-cell epitopes (derived from B-cells) of both MHC classes (I and II), were prioritized among three potential protein candidates. These epitopes were then docked together by using linkers (GPGPG and AAY) and an adjuvant (Cholera Toxin B) to form a poly-valent vaccine construct. The chimeric vaccine construct was undergone by molecular modelling, further refinement and energy minimization. We predicted positive results of the vaccine construct in immune simulations with significantly high levels of immune cells. The protein-protein docking analysis of vaccine construct with toll-like receptors predicted efficient binding, which was further validated by molecular dynamics simulation of vaccine construct with TLR-2 and TLR-4 at 120 ns, resulting in stable complexes' conformation throughout the simulation run. Overall, the vaccine construct demonstrated positive antigenic response. In future, this chimeric vaccine construct or the identified epitopes could be experimentally validated for the development of UPEC vaccines against UTIs.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37808295, year = {2023}, author = {Wang, Z and Liu, Y and Liu, P and Jian, Z and Yan, Q and Tang, B and Yang, A and Liu, W}, title = {Genomic and clinical characterization of Klebsiella pneumoniae carrying the pks island.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1189120}, pmid = {37808295}, issn = {1664-302X}, abstract = {BACKGROUND: The pks island and its production of the bacterial secondary metabolite genotoxin, colibactin, have attracted increasing attention. However, genomic articles focusing on pks islands in Klebsiella pneumoniae, as well as comparative genomic studies of mobile genetic elements, such as prophages, plasmids, and insertion sequences, are lacking. In this study, a large-scale analysis was conducted to understand the prevalence and evolution of pks islands, differences in mobile genetic elements between pks-negative and pks-positive K. pneumoniae, and clinical characteristics of infection caused by pks-positive K. pneumoniae.

METHODS: The genomes of 2,709 K. pneumoniae were downloaded from public databases, among which, 1,422 were from NCBI and 1,287 were from the China National GeneBank DataBase (CNGBdb). Screening for virulence and resistance genes, phylogenetic tree construction, and pan-genome analysis were performed. Differences in mobile genetic elements between pks-positive and pks-negative strains were compared. The clinical characteristics of 157 pks-positive and 157 pks-negative K. pneumoniae infected patients were investigated.

RESULTS: Of 2,709 K. pneumoniae genomes, 245 pks-positive genomes were screened. The four siderophores, type VI secretion system, and nutritional factor genes were present in at least 77.9% (191/245), 66.9% (164/245), and 63.3% (155/245) of pks-positive strains, respectively. The number and fragment length of prophage were lower in pks-positive strains than in pks-negative strains (p < 0.05). The prevalence of the IS6 family was higher in pks-negative strains than in pks-positive strains, and the prevalence of multiple plasmid replicon types differed between the pks-positive and pks-negative strains (p < 0.05). The detection rate of pks-positive K. pneumoniae in abscess samples was higher than that of pks-negative K. pneumoniae (p < 0.05).

CONCLUSION: The pks-positive strains had abundant virulence genes. There were differences in the distribution of mobile genetic elements between pks-positive and pks-negative isolates. Further analysis of the evolutionary pattern of pks island and epidemiological surveillance in different populations are needed.}, } @article {pmid37806426, year = {2023}, author = {Rosani, U and Sollitto, M and Fogal, N and Salata, C}, title = {Comparative analysis of Presence-Absence gene Variations in five hard tick species: impact and functional considerations.}, journal = {International journal for parasitology}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.ijpara.2023.08.004}, pmid = {37806426}, issn = {1879-0135}, abstract = {Tick species are vectors of harmful human and animal diseases, and their expansion is raising concerns under the global environmental changes' scenario. Ticks host and transmit bacteria, protozoa and viruses, making the understanding of host-pathogen molecular pathways critical to development of effective disease control strategies. Despite the considerable sizes and repeat contents of tick genomes, individual tick genomics is perhaps the most effective approach to reveal genotypic traits of interest. Presence-Absence gene Variations (PAVs) can contribute to individual differences within species, with dispensable genes carried by subsets of individuals possibly underpinning functional significance at individual or population-levels. We exploited 350 resequencing datasets of Dermacentor silvarum, Haemaphysalis longicornis, Ixodes persulcatus, Rhipicephalus microplus and Rhipicephalus sanguineus hard tick specimens to reveal the extension of PAV and the conservation of dispensable genes among individuals and, comparatively, between species. Overall, we traced 550-3,346 dispensable genes per species and were able to reconstruct 5.3-7 Mb of genomic regions not included in the respective reference genomes, as part of the tick pangenomes. Both dispensable genes and de novo predicted genes indicated that PAVs preferentially impacted mobile genetic elements in these tick species.}, } @article {pmid37804413, year = {2023}, author = {Bouznada, K and Belaouni, HA and Meklat, A}, title = {Genome-based reclassification of Kitasatospora niigatensis as a later heterotypic synonym of Kitasatospora cineracea Tajima et al. (2001).}, journal = {Antonie van Leeuwenhoek}, volume = {}, number = {}, pages = {}, pmid = {37804413}, issn = {1572-9699}, abstract = {The present study used genome-based approaches to investigate the taxonomic relationship between Kitasatospora cineracea DSM 44780[T] and Kitasatospora niigatensis DSM 44781[T], two species that were previously described by Tajima et al. (Int J Syst Evol Microbiol 51:1765-1771, 2001). The digital DNA-DNA hybridization (dDDH), average amino acid identity (AAI), and average nucleotide identity (ANI) values between the genomes of the two type strains were 90.3, 98.7, and 99.1%, respectively. These values exceeded the established thresholds of 70% (dDDH) and 95-96% (ANI and AAI) for bacterial species delineation, suggesting that K. cineracea and K. niigatensis should share the same taxonomic position. Furthermore, our analysis using the 'Bacterial Pan Genome Analysis' (BPGA) pipeline and the Maximum Likelihood core-genes tree inferred using FastTree2 consistently demonstrated that K. cineracea DSM 44780[T] and K. niigatensis DSM 44781[T] are closely related, as indicated by the clustering of these strains in the core-genes phylogenomic tree. Based on these findings, we propose that K. niigatensis should be considered a later heterotypic synonym of K. cineracea.}, } @article {pmid37803826, year = {2023}, author = {Niu, Y and Liu, Q and He, Z and Raman, R and Wang, H and Long, X and Qin, H and Raman, H and Parkin, IAP and Bancroft, I and Zou, J}, title = {A Brassica carinata pan-genome platform for Brassica crop improvement.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100725}, doi = {10.1016/j.xplc.2023.100725}, pmid = {37803826}, issn = {2590-3462}, } @article {pmid37803772, year = {2023}, author = {You, L and Lv, R and Jin, H and Ma, T and Zhao, Z and Kwok, LY and Sun, Z}, title = {A large-scale comparative genomics study reveals niche-driven and within-sample intra-species functional diversification in Lacticaseibacillus rhamnosus.}, journal = {Food research international (Ottawa, Ont.)}, volume = {173}, number = {Pt 2}, pages = {113446}, doi = {10.1016/j.foodres.2023.113446}, pmid = {37803772}, issn = {1873-7145}, abstract = {Lacticaseibacillus rhamnosus (L. rhamnosus) is widely recognized as a probiotic species, and it exists in a variety of environments including host gut and dairy products. This work aimed at conducting a large-scale comparative genomics analysis of 384 L. rhamnosus genomes (257 whole-sequence or metagenomic-assembled genomes from gut-associated isolates [122 and 135 retrieved from the UHGG and NCBI databases, respectively] and 127 genomes from dairy isolates [34 from the NCBI database; 93 isolated from a cheese sample and sequenced here]). Our results showed that L. rhamnosus had a large and open pan-genome (15,253 pan-genes identified from all 384 genomes; 15,028 pan-genes if the 93 cheese-originated isolates were excluded). The core-gene phylogenetic tree constructed from the 384 L. rhamnosus genomes comprised five phylogenetic branches, with a random distribution of dairy and gut-associated isolates/genomes across the tree. No significant difference was identified in the overall profile of metabolism-related genes between dairy and gut-associated genomes; however, notably, the gut-associated strains/isolates contained more genes coding for specific metabolic pathways and carbohydrate-active enzymes, e.g., lacto-N-biosidase (EC 3.2.1.140; GT20) and lacto-N-biose phosphorylase/galacto-N-biose phosphorylase (EC 2.4.1.211; GH112). Further, we found that there was obvious intra-species diversification of the 93 cheese-originated L. rhamnosus isolates, forming three clades (Clades A, B, and C) in the reconstructed core-gene phylogenetic tree. There were numerous single nucleotide variations (over 10,000) across the three clades. Moreover, significant differences were observed in the content of metabolism-related genes across clades (p < 0.05, Adonis test), characterized by the enrichment in glycoside hydrolases in Clade C and the possession of unique metabolic pathways in each clade. These results implicated genomics/functional diversification of L. rhamnosus in a single food matrix and niche-driven adaptive evolution of isolates from dairy and host gut-associated origins. Our study shed insights into the selection of candidate strains for food industry applications.}, } @article {pmid37802986, year = {2023}, author = {Kang, M and Wu, H and Liu, H and Liu, W and Zhu, M and Han, Y and Liu, W and Chen, C and Song, Y and Tan, L and Yin, K and Zhao, Y and Yan, Z and Lou, S and Zan, Y and Liu, J}, title = {The pan-genome and local adaptation of Arabidopsis thaliana.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {6259}, pmid = {37802986}, issn = {2041-1723}, abstract = {Arabidopsis thaliana serves as a model species for investigating various aspects of plant biology. However, the contribution of genomic structural variations (SVs) and their associate genes to the local adaptation of this widely distribute species remains unclear. Here, we de novo assemble chromosome-level genomes of 32 A. thaliana ecotypes and determine that variable genes expand the gene pool in different ecotypes and thus assist local adaptation. We develop a graph-based pan-genome and identify 61,332 SVs that overlap with 18,883 genes, some of which are highly involved in ecological adaptation of this species. For instance, we observe a specific 332 bp insertion in the promoter region of the HPCA1 gene in the Tibet-0 ecotype that enhances gene expression, thereby promotes adaptation to alpine environments. These findings augment our understanding of the molecular mechanisms underlying the local adaptation of A. thaliana across diverse habitats.}, } @article {pmid37801223, year = {2023}, author = {Dias, RS and Kremer, FS and da Costa de Avila, LF}, title = {In silico prospection of Lactobacillus acidophilus strains with potential probiotic activity.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {}, number = {}, pages = {}, pmid = {37801223}, issn = {1678-4405}, abstract = {Lactic acid bacteria (LAB) are fermentative microorganisms and perform different roles in biotechnological processes, mainly in the food and pharmaceutical industries. Among the LAB, Lactobacillus acidophilus is a species that deserves to be highlighted for being used both in prophylaxis and in the treatment of pathologies. Most of the metabolites produced by this species are linked to the inhibition of pathogens. In this study, we utilized a pangenomic and metabolic annotation analysis using Roary and BlastKOALA, ML-based probiotic activity prediction with iProbiotic and whole-genome similarity using ANI to identify strains of L. acidophilus with potential probiotic activity. According to the results in BlastKOALA and iProbiotics, L. acidophilus NCTC 13721 had the greatest potential among the 64 strains tested, both in terms of its ability to be a Lactobacillus spp. probiotic, when in the amount of genes involved in the metabolism of organic acids and quorum sensing. In addition, DSM 20079 proved to be promising for prospecting new probiotic Lactobacillus from BlastKOALA analyses, as they presented similar results in the number of genes involved in the production of lactic acid, acetic acid, hydrogen peroxide, except for quorum sensing where the NCTC 13721 strain had 14 more genes. L. acidophilus NCTC 13721 and L. acidophilus La-5 strains showed greater ability to be Lactobacillus spp. probiotic capacity, showing 84.8% and 51.9% capacity in the iProbiotics tool, respectively. When analyzed in ANI, none of the evaluated strains showed genomic similarity with NCTC 13721. In contrast, the DSM 20079 strain showed genomic similarity with all evaluated strains except NCTC 13721. Furthermore, eight strains with characteristics with approximately 100% genomic similarity to La-5 were listed: S20_1, LA-5, FSI4, APC2845, LA-G80-111, DS1_1A, LA1, and BCRC 14065. Therefore, according to the findings in iProbiotics and BlastKoala, among the 64 strains evaluated, NCTC 13721 is the most promising strain to be used for future in vitro studies.}, } @article {pmid37799143, year = {2023}, author = {Fatima, K and Sadaqat, M and Azeem, F and Rao, MJ and Albekairi, NA and Alshammari, A and Tahir Ul Qamar, M}, title = {Integrated omics and machine learning-assisted profiling of cysteine-rich-receptor-like kinases from three peanut spp. revealed their role in multiple stresses.}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1252020}, pmid = {37799143}, issn = {1664-8021}, abstract = {Arachis hypogaea (peanut) is a leading oil and protein-providing crop with a major food source in many countries. It is mostly grown in tropical regions and is largely affected by abiotic and biotic stresses. Cysteine-rich receptor-like kinases (CRKs) is a family of transmembrane proteins that play important roles in regulating stress-signaling and defense mechanisms, enabling plants to tolerate stress conditions. However, almost no information is available regarding this gene family in Arachis hypogaea and its progenitors. This study conducts a pangenome-wide investigation of A. hypogaea and its two progenitors, A. duranensis and A. ipaensis CRK genes (AhCRKs, AdCRKs, and AiCRKs). The gene structure, conserved motif patterns, phylogenetic history, chromosomal distribution, and duplication were studied in detail, showing the intraspecies structural conservation and evolutionary patterns. Promoter cis-elements, protein-protein interactions, GO enrichment, and miRNA targets were also predicted, showing their potential functional conservation. Their expression in salt and drought stresses was also comprehensively studied. The CRKs identified were divided into three groups, phylogenetically. The expansion of this gene family in peanuts was caused by both types of duplication: tandem and segmental. Furthermore, positive as well as negative selection pressure directed the duplication process. The peanut CRK genes were also enriched in hormones, light, development, and stress-related elements. MicroRNA (miRNA) also targeted the AhCRK genes, which suggests the regulatory association of miRNAs in the expression of these genes. Transcriptome datasets showed that AhCRKs have varying expression levels under different abiotic stress conditions. Furthermore, the multi-stress responsiveness of the AhCRK genes was evaluated using a machine learning-based method, Random Forest (RF) classifier. The 3D structures of AhCRKs were also predicted. Our study can be utilized in developing a detailed understanding of the stress regulatory mechanisms of the CRK gene family in peanuts and its further studies to improve the genetic makeup of peanuts to thrive better under stress conditions.}, } @article {pmid37798879, year = {2023}, author = {Miao, H and Wang, L and Qu, L and Liu, H and Sun, Y and Le, M and Wang, Q and Wei, S and Zheng, Y and Lin, W and Duan, Y and Cao, H and Xiong, S and Wang, X and Wei, L and Li, C and Ma, Q and Ju, M and Zhao, R and Li, G and Mu, C and Tian, Q and Mei, H and Zhang, T and Gao, T and Zhang, H}, title = {Genomic evolution and insights into agronomic trait innovations of Sesamum Species.}, journal = {Plant communications}, volume = {}, number = {}, pages = {100729}, doi = {10.1016/j.xplc.2023.100729}, pmid = {37798879}, issn = {2590-3462}, abstract = {Sesame is an ancient oilseed crop with a high oil content and quality. However, the evolutionary history and genetic mechanisms of the valuable agronomic traits remain unclear. Herein, we reported chromosome-scale genomes for the cultivated and six wild Sesamum species, representing all three karyotypes within this genus. Karyotyping and genome-based phylogenic analysis found the evolution route of Sesamum species from n = 13 to n = 16 and revealed that allotetraploidization occurred in wild species S. radiatum. Moreover, the early divergence and ancient phylogenic position of the Sesamum genus (48.5-19.7 million years ago) was observed within eudicots during the Tertiary period. Pan-genome analysis further revealed 9,164 core gene families in the seven Sesamum species. These families were significantly enriched in various metabolic pathways, including fatty acid (FA) metabolism and FA biosynthesis. Structural variations in SiPT1 and SiDT1 within the PEBP gene family led to the genomic evolution of the plant architecture and inflorescence development phenotypes in Sesamum. A genome-wide association study (GWAS) of the interspecific population and comparative genome identified a long terminal repeat insertion in wild S. angustifolium and sequence deletion in cultivated sesame DIR genes, both independently caused high Fusarium wilt disease susceptibility. A GWAS of 560 sesame accessions combined with an overexpression study confirmed NAC1 and PPO genes play an important role in oil content upregulation in sesame. Collectively, our study provides high-quality genomic resources for cultivated and wild Sesamum species revealing insights to improve the molecular breeding strategies of sesame and other oilseed crops.}, } @article {pmid37798615, year = {2023}, author = {Contreras-Moreira, B and Saraf, S and Naamati, G and Casas, AM and Amberkar, SS and Flicek, P and Jones, AR and Dyer, S}, title = {GET_PANGENES: calling pangenes from plant genome alignments confirms presence-absence variation.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {223}, pmid = {37798615}, issn = {1474-760X}, support = {WT222155/Z/20/Z/WT_/Wellcome Trust/United Kingdom ; }, abstract = {Crop pangenomes made from individual cultivar assemblies promise easy access to conserved genes, but genome content variability and inconsistent identifiers hamper their exploration. To address this, we define pangenes, which summarize a species coding potential and link back to original annotations. The protocol get_pangenes performs whole genome alignments (WGA) to call syntenic gene models based on coordinate overlaps. A benchmark with small and large plant genomes shows that pangenes recapitulate phylogeny-based orthologies and produce complete soft-core gene sets. Moreover, WGAs support lift-over and help confirm gene presence-absence variation. Source code and documentation: https://github.com/Ensembl/plant-scripts .}, } @article {pmid37796250, year = {2023}, author = {Jung, H and Lee, D and Lee, S and Kong, HJ and Park, J and Seo, YS}, title = {Comparative genomic analysis of Chryseobacterium species: deep insights into plant-growth-promoting and halotolerant capacities.}, journal = {Microbial genomics}, volume = {9}, number = {10}, pages = {}, doi = {10.1099/mgen.0.001108}, pmid = {37796250}, issn = {2057-5858}, abstract = {Members of the genus Chryseobacterium have attracted great interest as beneficial bacteria that can promote plant growth and biocontrol. Given the recent risks of climate change, it is important to develop tolerance strategies for efficient applications of plant-beneficial bacteria in saline environments. However, the genetic determinants of plant-growth-promoting and halotolerance effects in Chryseobacterium have not yet been investigated at the genomic level. Here, a comparative genomic analysis was conducted with seven Chryseobacterium species. Phylogenetic and phylogenomic analyses revealed niche-specific evolutionary distances between soil and freshwater Chryseobacterium species, consistent with differences in genomic statistics, indicating that the freshwater bacteria have smaller genome sizes and fewer genes than the soil bacteria. Phosphorus- and zinc-cycling genes (required for nutrient acquisition in plants) were universally present in all species, whereas nitrification and sulphite reduction genes (required for nitrogen- and sulphur-cycling, respectively) were distributed only in soil bacteria. A pan-genome containing 6842 gene clusters was constructed, which reflected the general features of the core, accessory and unique genomes. Halotolerant species with an accessory genome shared a Kdp potassium transporter and biosynthetic pathways for branched-chain amino acids and the carotenoid lycopene, which are associated with countermeasures against salt stress. Protein-protein interaction network analysis was used to define the genetic determinants of Chryseobacterium salivictor NBC122 that reduce salt damage in bacteria and plants. Sixteen hub genes comprised the aromatic compound degradation and Por secretion systems, which are required to cope with complex stresses associated with saline environments. Horizontal gene transfer and CRISPR-Cas analyses indicated that C. salivictor NBC122 underwent more evolutionary events when interacting with different environments. These findings provide deep insights into genomic adaptation to dynamic interactions between plant-growth-promoting Chryseobacterium and salt stress.}, } @article {pmid37793435, year = {2023}, author = {Raimondeau, P and Bianconi, ME and Pereira, L and Parisod, C and Christin, PA and Dunning, LT}, title = {Lateral gene transfer generates accessory genes that accumulate at different rates within a grass lineage.}, journal = {The New phytologist}, volume = {}, number = {}, pages = {}, doi = {10.1111/nph.19272}, pmid = {37793435}, issn = {1469-8137}, support = {NE/T011025/1//Natural Environment Research Council/ ; NE/V000012/1//Natural Environment Research Council/ ; URF/R/180022//Royal Society/ ; }, abstract = {Lateral gene transfer (LGT) is the movement of DNA between organisms without sexual reproduction. The acquired genes represent genetic novelties that have independently evolved in the donor's genome. Phylogenetic methods have shown that LGT is widespread across the entire grass family, although we know little about the underlying dynamics. We identify laterally acquired genes in five de novo reference genomes from the same grass genus (four Alloteropsis semialata and one Alloteropsis angusta). Using additional resequencing data for a further 40 Alloteropsis individuals, we place the acquisition of each gene onto a phylogeny using stochastic character mapping, and then infer rates of gains and losses. We detect 168 laterally acquired genes in the five reference genomes (32-100 per genome). Exponential decay models indicate that the rate of LGT acquisitions (6-28 per Ma) and subsequent losses (11-24% per Ma) varied significantly among lineages. Laterally acquired genes were lost at a higher rate than vertically inherited loci (0.02-0.8% per Ma). This high turnover creates intraspecific gene content variation, with a preponderance of them occurring as accessory genes in the Alloteropsis pangenome. This rapid turnover generates standing variation that can ultimately fuel local adaptation.}, } @article {pmid37791541, year = {2023}, author = {Felgate, H and Sethi, D and Faust, K and Kiy, C and Härtel, C and Rupp, J and Clifford, R and Dean, R and Tremlett, C and Wain, J and Langridge, G and Clarke, P and Page, AJ and Webber, MA}, title = {Characterisation of neonatal Staphylococcus capitis NRCS-A isolates compared with non NRCS-A Staphylococcus capitis from neonates and adults.}, journal = {Microbial genomics}, volume = {9}, number = {10}, pages = {}, doi = {10.1099/mgen.0.001106}, pmid = {37791541}, issn = {2057-5858}, abstract = {Staphylococcus capitis is a frequent cause of late-onset sepsis in neonates admitted to Neonatal Intensive Care Units (NICU). One clone of S. capitis, NRCS-A has been isolated from NICUs globally although the reasons for the global success of this clone are not well understood.We analysed a collection of S. capitis colonising babies admitted to two NICUs, one in the UK and one in Germany as well as corresponding pathological clinical isolates. Genome analysis identified a population structure of three groups; non-NRCS-A isolates, NRCS-A isolates, and a group of 'proto NRCS-A' - isolates closely related to NRCS-A but not associated with neonatal infection. All bloodstream isolates belonged to the NRCS-A group and were indistinguishable from strains carried on the skin or in the gut. NRCS-A isolates showed increased tolerance to chlorhexidine and antibiotics relative to the other S. capitis as well as enhanced ability to grow at higher pH values. Analysis of the pangenome of 138 isolates identified characteristic nsr and tarJ genes in both the NRCS-A and proto groups. A CRISPR-cas system was only seen in NRCS-A isolates which also showed enrichment of genes for metal acquisition and transport.We found evidence for transmission of S. capitis NRCS-A within NICU, with related isolates shared between babies and multiple acquisitions by some babies. Our data show NRCS-A strains commonly colonise uninfected babies in NICU representing a potential reservoir for potential infection. This work provides more evidence that adaptation to survive in the gut and on skin facilitates spread of NRCS-A, and that metal acquisition and tolerance may be important to the biology of NRCS-A. Understanding how NRCS-A survives in NICUs can help develop infection control procedures against this clone.}, } @article {pmid37790793, year = {2023}, author = {Shen, F and Hu, C and Huang, X and He, H and Yang, D and Zhao, J and Yang, X}, title = {Advances in alternative splicing identification: deep learning and pantranscriptome.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1232466}, pmid = {37790793}, issn = {1664-462X}, abstract = {In plants, alternative splicing is a crucial mechanism for regulating gene expression at the post-transcriptional level, which leads to diverse proteins by generating multiple mature mRNA isoforms and diversify the gene regulation. Due to the complexity and variability of this process, accurate identification of splicing events is a vital step in studying alternative splicing. This article presents the application of alternative splicing algorithms with or without reference genomes in plants, as well as the integration of advanced deep learning techniques for improved detection accuracy. In addition, we also discuss alternative splicing studies in the pan-genomic background and the usefulness of integrated strategies for fully profiling alternative splicing.}, } @article {pmid37790531, year = {2023}, author = {Heumos, S and Guarracino, A and Schmelzle, JM and Li, J and Zhang, Z and Hagmann, J and Nahnsen, S and Prins, P and Garrison, E}, title = {Pangenome graph layout by Path-Guided Stochastic Gradient Descent.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.09.22.558964}, pmid = {37790531}, abstract = {MOTIVATION: The increasing availability of complete genomes demands for models to study genomic variability within entire populations. Pangenome graphs capture the full genetic diversity between multiple genomes, but their layouts may exhibit complex structures due to common, nonlinear patterns of genome variation and evolution. These structures hamper downstream analyses, visualization, and interpretation.

RESULTS: In response, we introduce a novel graph layout algorithm: the Path-Guided Stochastic Gradient Descent (PG-SGD). PG-SGD uses the genomes, represented in the pangenome graph as paths, to move pairs of nodes in parallel applying a modified HOGWILD! strategy. We show that our implementation efficiently computes the layout of gigabase-scale pangenome graphs, unveiling their biological features.

AVAILABILITY: We integrated PG-SGD in ODGI which is released as free software under the MIT open source license. Source code is available at https://github.com/pangenome/odgi .

CONTACT: egarris5@uthsc.edu.}, } @article {pmid37783780, year = {2023}, author = {Huang, Y and He, J and Xu, Y and Zheng, W and Wang, S and Chen, P and Zeng, B and Yang, S and Jiang, X and Liu, Z and Wang, L and Wang, X and Liu, S and Lu, Z and Liu, Z and Yu, H and Yue, J and Gao, J and Zhou, X and Long, C and Zeng, X and Guo, YJ and Zhang, WF and Xie, Z and Li, C and Ma, Z and Jiao, W and Zhang, F and Larkin, RM and Krueger, RR and Smith, MW and Ming, R and Deng, X and Xu, Q}, title = {Pangenome analysis provides insight into the evolution of the orange subfamily and a key gene for citric acid accumulation in citrus fruits.}, journal = {Nature genetics}, volume = {}, number = {}, pages = {}, pmid = {37783780}, issn = {1546-1718}, abstract = {The orange subfamily (Aurantioideae) contains several Citrus species cultivated worldwide, such as sweet orange and lemon. The origin of Citrus species has long been debated and less is known about the Aurantioideae. Here, we compiled the genome sequences of 314 accessions, de novo assembled the genomes of 12 species and constructed a graph-based pangenome for Aurantioideae. Our analysis indicates that the ancient Indian Plate is the ancestral area for Citrus-related genera and that South Central China is the primary center of origin of the Citrus genus. We found substantial variations in the sequence and expression of the PH4 gene in Citrus relative to Citrus-related genera. Gene editing and biochemical experiments demonstrate a central role for PH4 in the accumulation of citric acid in citrus fruits. This study provides insights into the origin and evolution of the orange subfamily and a regulatory mechanism underpinning the evolution of fruit taste.}, } @article {pmid37779718, year = {2023}, author = {Yang, W and Yang, H and Bao, X and Hussain, M and Bao, Q and Zeng, Z and Xiao, C and Zhou, L and Qin, X}, title = {Brevibacillus brevis HNCS-1: a biocontrol bacterium against tea plant diseases.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1198747}, pmid = {37779718}, issn = {1664-302X}, abstract = {As a biocontrol bacteria, Brevibacillus has been the subject of extensive research for agricultural applications. Antibacterial peptides (AMPs) are the main antibacterial products of Brevibacillus. This study isolated a strain of Br. brevis HNCS-1 from tea garden soil, and the strain has an antagonistic effect against five types of pathogens of tea diseases, namely Gloeosporium theae-sinensis, Elsinoe leucospira, Phyllosticta theaefolia, Fusarium sp., and Cercospora theae. To determine the genetic characteristics implicated in the biocontrol mechanism, the genome sequence of the HNCS-1 strain was obtained and analyzed further, and the data are deposited in the GenBank repository (No. CP128411). Comparative genomics analyses revealed that the HNCS-1 strain and 17 public Br. brevis share a core genome composed of 3,742 genes. Interestingly, only one non-ribosomal peptide synthetase (NRPS) gene cluster annotated as edeine is present in the core genome. And UHPLC-MS/MS detection results showd that edeine B and edeine A were the principal antibacterial peptides in the HNCS-1 strain. This study proves that edeine is the main antibacterial peptide of Br. brevis, and provides a new strategy for the identification of antibacterial products from other biocontrol bacteria.}, } @article {pmid37779688, year = {2023}, author = {González, D and Morales-Olavarria, M and Vidal-Veuthey, B and Cárdenas, JP}, title = {Insights into early evolutionary adaptations of the Akkermansia genus to the vertebrate gut.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1238580}, pmid = {37779688}, issn = {1664-302X}, abstract = {Akkermansia, a relevant mucin degrader from the vertebrate gut microbiota, is a member of the deeply branched Verrucomicrobiota, as well as the only known member of this phylum to be described as inhabitants of the gut. Only a few Akkermansia species have been officially described so far, although there is genomic evidence addressing the existence of more species-level variants for this genus. This niche specialization makes Akkermansia an interesting model for studying the evolution of microorganisms to their adaptation to the gastrointestinal tract environment, including which kind of functions were gained when the Akkermansia genus originated or how the evolutionary pressure functions over those genes. In order to gain more insight into Akkermansia adaptations to the gastrointestinal tract niche, we performed a phylogenomic analysis of 367 high-quality Akkermansia isolates and metagenome-assembled genomes, in addition to other members of Verrucomicrobiota. This work was focused on three aspects: the definition of Akkermansia genomic species clusters and the calculation and functional characterization of the pangenome for the most represented species; the evolutionary relationship between Akkermansia and their closest relatives from Verrucomicrobiota, defining the gene families which were gained or lost during the emergence of the last Akkermansia common ancestor (LAkkCA) and; the evaluation of the evolutionary pressure metrics for each relevant gene family of main Akkermansia species. This analysis found 25 Akkermansia genomic species clusters distributed in two main clades, divergent from their non-Akkermansia relatives. Pangenome analyses suggest that Akkermansia species have open pangenomes, and the gene gain/loss model indicates that genes associated with mucin degradation (both glycoside hydrolases and peptidases), (micro)aerobic metabolism, surface interaction, and adhesion were part of LAkkCA. Specifically, mucin degradation is a very ancestral innovation involved in the origin of Akkermansia. Horizontal gene transfer detection suggests that Akkermansia could receive genes mostly from unknown sources or from other Gram-negative gut bacteria. Evolutionary metrics suggest that Akkemansia species evolved differently, and even some conserved genes suffered different evolutionary pressures among clades. These results suggest a complex evolutionary landscape of the genus and indicate that mucin degradation could be an essential feature in Akkermansia evolution as a symbiotic species.}, } @article {pmid37779189, year = {2023}, author = {Xie, S and Isaacs, K and Becker, G and Murdoch, BM}, title = {A computational framework for improving genetic variants identification from 5,061 sheep sequencing data.}, journal = {Journal of animal science and biotechnology}, volume = {14}, number = {1}, pages = {127}, pmid = {37779189}, issn = {1674-9782}, support = {USDA-NIFA-IDA1566//National Institute of Food and Agriculture/ ; }, abstract = {BACKGROUND: Pan-genomics is a recently emerging strategy that can be utilized to provide a more comprehensive characterization of genetic variation. Joint calling is routinely used to combine identified variants across multiple related samples. However, the improvement of variants identification using the mutual support information from multiple samples remains quite limited for population-scale genotyping.

RESULTS: In this study, we developed a computational framework for joint calling genetic variants from 5,061 sheep by incorporating the sequencing error and optimizing mutual support information from multiple samples' data. The variants were accurately identified from multiple samples by using four steps: (1) Probabilities of variants from two widely used algorithms, GATK and Freebayes, were calculated by Poisson model incorporating base sequencing error potential; (2) The variants with high mapping quality or consistently identified from at least two samples by GATK and Freebayes were used to construct the raw high-confidence identification (rHID) variants database; (3) The high confidence variants identified in single sample were ordered by probability value and controlled by false discovery rate (FDR) using rHID database; (4) To avoid the elimination of potentially true variants from rHID database, the variants that failed FDR were reexamined to rescued potential true variants and ensured high accurate identification variants. The results indicated that the percent of concordant SNPs and Indels from Freebayes and GATK after our new method were significantly improved 12%-32% compared with raw variants and advantageously found low frequency variants of individual sheep involved several traits including nipples number (GPC5), scrapie pathology (PAPSS2), seasonal reproduction and litter size (GRM1), coat color (RAB27A), and lentivirus susceptibility (TMEM154).

CONCLUSION: The new method used the computational strategy to reduce the number of false positives, and simultaneously improve the identification of genetic variants. This strategy did not incur any extra cost by using any additional samples or sequencing data information and advantageously identified rare variants which can be important for practical applications of animal breeding.}, } @article {pmid37777666, year = {2023}, author = {Rahim, MS and Sharma, V and Pragati Yadav, and Parveen, A and Kumar, A and Roy, J and Kumar, V}, title = {Rethinking underutilized cereal crops: pan-omics integration and green system biology.}, journal = {Planta}, volume = {258}, number = {5}, pages = {91}, pmid = {37777666}, issn = {1432-2048}, abstract = {Due to harsh lifestyle changes, in the present era, nutritional security is needed along with food security so it is necessary to include underutilized cereal crops (UCCs) in our daily diet to counteract the rising danger of human metabolic illness. We can attain both the goal of zero hunger and nutritional security by developing improved UCCs using advanced pan-omics (genomics, transcriptomics, proteomics, metabolomics, nutrigenomics, phenomics and ionomics) practices. Plant sciences research progressed profoundly since the last few decades with the introduction of advanced technologies and approaches, addressing issues of food demand of the growing population, nutritional security challenges and climate change. However, throughout the expansion and popularization of commonly consumed major cereal crops such as wheat and rice, other cereal crops such as millet, rye, sorghum, and others were impeded, despite their potential medicinal and nutraceutical qualities. Undoubtedly neglected underutilized cereal crops (UCCs) also have the capability to withstand diverse climate change. To relieve the burden of major crops, it is necessary to introduce the new crops in our diet in the way of UCCs. Introgression of agronomically and nutritionally important traits by pan-omics approaches in UCCs could be a defining moment for the population's well-being on the globe. This review discusses the importance of underutilized cereal crops, as well as the application of contemporary omics techniques and advanced bioinformatics tools that could open up new avenues for future study and be valuable assets in the development and usage of UCCs in the perspective of green system biology. The increased and improved use of UCCs is dependent on number of factors that necessitate a concerted research effort in agricultural sciences. The emergence of functional genomics with molecular genetics might gear toward the reawakening of interest in underutilized cereals crops. The need of this era is to focus on potential UCCs in advanced agriculture and breeding programmes. Hence, targeting the UCCs, might provide a bright future for better health and scientific rationale for its use.}, } @article {pmid37775806, year = {2023}, author = {Bonnet, K and Marschall, T and Doerr, D}, title = {Constructing founder sets under allelic and non-allelic homologous recombination.}, journal = {Algorithms for molecular biology : AMB}, volume = {18}, number = {1}, pages = {15}, pmid = {37775806}, issn = {1748-7188}, support = {1U01HG010973/NH/NIH HHS/United States ; }, abstract = {Homologous recombination between the maternal and paternal copies of a chromosome is a key mechanism for human inheritance and shapes population genetic properties of our species. However, a similar mechanism can also act between different copies of the same sequence, then called non-allelic homologous recombination (NAHR). This process can result in genomic rearrangements-including deletion, duplication, and inversion-and is underlying many genomic disorders. Despite its importance for genome evolution and disease, there is a lack of computational models to study genomic loci prone to NAHR. In this work, we propose such a computational model, providing a unified framework for both (allelic) homologous recombination and NAHR. Our model represents a set of genomes as a graph, where haplotypes correspond to walks through this graph. We formulate two founder set problems under our recombination model, provide flow-based algorithms for their solution, describe exact methods to characterize the number of recombinations, and demonstrate scalability to problem instances arising in practice.}, } @article {pmid37773075, year = {2023}, author = {Trinh, P and Clausen, DS and Willis, AD}, title = {happi: a hierarchical approach to pangenomics inference.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {214}, pmid = {37773075}, issn = {1474-760X}, support = {R35 GM133420/GM/NIGMS NIH HHS/United States ; R21 AI168679/AI/NIAID NIH HHS/United States ; T32 ES015459/ES/NIEHS NIH HHS/United States ; }, abstract = {Recovering metagenome-assembled genomes (MAGs) from shotgun sequencing data is an increasingly common task in microbiome studies, as MAGs provide deeper insight into the functional potential of both culturable and non-culturable microorganisms. However, metagenome-assembled genomes vary in quality and may contain omissions and contamination. These errors present challenges for detecting genes and comparing gene enrichment across sample types. To address this, we propose happi, an approach to testing hypotheses about gene enrichment that accounts for genome quality. We illustrate the advantages of happi over existing approaches using published Saccharibacteria MAGs, Streptococcus thermophilus MAGs, and via simulation.}, } @article {pmid37772863, year = {2023}, author = {Lanza, A and Mizobata, H and Yonezawa, R and Yoshitake, K and Shigeharu, K and Asakawa, S}, title = {Complete genome sequence of Edwardsiella sp. NBRC12716 isolated in 1962 from the liver of diseased eel.}, journal = {Microbiology resource announcements}, volume = {}, number = {}, pages = {e0073723}, doi = {10.1128/MRA.00737-23}, pmid = {37772863}, issn = {2576-098X}, abstract = {We report the complete genome sequence of Edwardsiella sp. NBRC12716 isolated from a diseased eel in 1962. The genome consists of a single, circular chromosome 3,771,060 bp in length with 59.74% GC content and encodes 25 rRNA, 96 tRNA, and 3,182 protein-coding genes.}, } @article {pmid37764993, year = {2023}, author = {Tamayo-Ordóñez, MC and Rosas-García, NM and Ayil-Gutiérrez, BA and Bello-López, JM and Tamayo-Ordóñez, FA and Anguebes-Franseschi, F and Damas-Damas, S and Tamayo-Ordóñez, YJ}, title = {Non-Structural Proteins (Nsp): A Marker for Detection of Human Coronavirus Families.}, journal = {Pathogens (Basel, Switzerland)}, volume = {12}, number = {9}, pages = {}, doi = {10.3390/pathogens12091185}, pmid = {37764993}, issn = {2076-0817}, abstract = {SARS-CoV-2 was the cause of the global pandemic that caused a total of 14.9 million deaths during the years 2020 and 2021, according to the WHO. The virus presents a mutation rate between 10-5 and 10-3 substitutions per nucleotide site per cell infection (s/n/c). Due to this, studies aimed at knowing the evolution of this virus could help us to foresee (through the future development of new detection strategies and vaccines that prevent the infection of this virus in human hosts) that a pandemic caused by this virus will be generated again. In this research, we performed a functional annotation and identification of changes in Nsp (non-structural proteins) domains in the coronavirus genome. The comparison of the 13 selected coronavirus pangenomes demonstrated a total of 69 protein families and 57 functions associated with the structural domain's differentials between genomes. A marked evolutionary conservation of non-structural proteins was observed. This allowed us to identify and classify highly pathogenic human coronaviruses into alpha, beta, gamma, and delta groups. The designed Nsp cluster provides insight into the trajectory of SARS-CoV-2, demonstrating that it continues to evolve rapidly. An evolutionary marker allows us to discriminate between phylogenetically divergent groups, viral genotypes, and variants between the alpha and betacoronavirus genera. These types of evolutionary studies provide a window of opportunity to use these Nsp as targets of viral therapies.}, } @article {pmid37761974, year = {2023}, author = {Merkushova, AV and Shikov, AE and Nizhnikov, AA and Antonets, KS}, title = {For Someone, You Are the Whole World: Host-Specificity of Salmonella enterica.}, journal = {International journal of molecular sciences}, volume = {24}, number = {18}, pages = {}, doi = {10.3390/ijms241813670}, pmid = {37761974}, issn = {1422-0067}, support = {MD-2302.2022.5//The Grant of the President of the Russian Federation/ ; }, abstract = {Salmonella enterica is a bacterial pathogen known to cause gastrointestinal infections in diverse hosts, including humans and animals. Despite extensive knowledge of virulence mechanisms, understanding the factors driving host specificity remains limited. In this study, we performed a comprehensive pangenome-wide analysis of S. enterica to identify potential loci determining preference towards certain hosts. We used a dataset of high-quality genome assemblies grouped into 300 reference clusters with a special focus on four host groups: humans, pigs, cattle, and birds. The reconstructed pangenome was shown to be open and enriched with the accessory component implying high genetic diversity. Notably, phylogenetic inferences did not correspond to the distribution of affected hosts, as large compact phylogenetic groups were absent. By performing a pangenome-wide association study, we identified potential host specificity determinants. These included multiple genes encoding proteins involved in distinct infection stages, e.g., secretion systems, surface structures, transporters, transcription regulators, etc. We also identified antibiotic resistance loci in host-adapted strains. Functional annotation corroborated the results obtained with significant enrichments related to stress response, antibiotic resistance, ion transport, and surface or extracellular localization. We suggested categorizing the revealed specificity factors into three main groups: pathogenesis, resistance to antibiotics, and propagation of mobile genetic elements (MGEs).}, } @article {pmid37759803, year = {2023}, author = {Naithani, S and Deng, CH and Sahu, SK and Jaiswal, P}, title = {Exploring Pan-Genomes: An Overview of Resources and Tools for Unraveling Structure, Function, and Evolution of Crop Genes and Genomes.}, journal = {Biomolecules}, volume = {13}, number = {9}, pages = {}, doi = {10.3390/biom13091403}, pmid = {37759803}, issn = {2218-273X}, abstract = {The availability of multiple sequenced genomes from a single species made it possible to explore intra- and inter-specific genomic comparisons at higher resolution and build clade-specific pan-genomes of several crops. The pan-genomes of crops constructed from various cultivars, accessions, landraces, and wild ancestral species represent a compendium of genes and structural variations and allow researchers to search for the novel genes and alleles that were inadvertently lost in domesticated crops during the historical process of crop domestication or in the process of extensive plant breeding. Fortunately, many valuable genes and alleles associated with desirable traits like disease resistance, abiotic stress tolerance, plant architecture, and nutrition qualities exist in landraces, ancestral species, and crop wild relatives. The novel genes from the wild ancestors and landraces can be introduced back to high-yielding varieties of modern crops by implementing classical plant breeding, genomic selection, and transgenic/gene editing approaches. Thus, pan-genomic represents a great leap in plant research and offers new avenues for targeted breeding to mitigate the impact of global climate change. Here, we summarize the tools used for pan-genome assembly and annotations, web-portals hosting plant pan-genomes, etc. Furthermore, we highlight a few discoveries made in crops using the pan-genomic approach and future potential of this emerging field of study.}, } @article {pmid37759383, year = {2023}, author = {Hafez, M and Gourlie, R and McDonald, M and Telfer, M and Carmona, M and Sautua, F and Moffat, C and Moolhuijzen, P and See, PT and Aboukhaddour, R}, title = {Evolution of the ToxB gene in Pyrenophora tritici-repentis and related species.}, journal = {Molecular plant-microbe interactions : MPMI}, volume = {}, number = {}, pages = {}, doi = {10.1094/MPMI-08-23-0114-FI}, pmid = {37759383}, issn = {0894-0282}, abstract = {Pyrenophora tritici-repentis is a destructive pathogen of wheat with global impact. It possesses a highly plastic open pangenome shaped by the gain and loss of effector genes. This study investigated the allelic variations in the chlorosis-encoding gene, ToxB, across 422 isolates representing all identified pathotypes and worldwide origins. To gain better insights into ToxB evolution, we examined its presence and variability in other Pyrenophora spp. A ToxB haplotype network was constructed, revealing the evolutionary relationships of this gene (20 haplotypes) across four Pyrenophora species. Notably, toxb, the homolog of ToxB, was detected for the first time in the barley pathogen Pyrenophora teres. The ToxB/toxb genes display evidence of selection that is characterized by loss of function, duplication, and diverse mutations. Among ToxB/toxb open reading frame, 72 mutations were identified, including 14 synonymous, 55 nonsynonymous, and 3 indel mutations. Remarkably, a ~5.6 Kb Copia-like retrotransposon, named Copia-1_Ptr, was found inserted in the toxb gene of a race 3 isolate. This insert disrupted the ToxB gene's function, a first case of effector gene disruption by a transposable element in Ptr. Additionally, a microsatellite with 25-nucleotide repeats (0 to 10) in the upstream region of ToxB suggested a potential mechanism influencing ToxB expression and regulation. Exploring ToxB-like protein distribution in other Ascomycetes revealed their presence in 19 additional species, including the Leotiomycetes class for the first time. The presence/absence pattern of ToxB-like proteins defied species relatedness compared to a phylogenetic tree, suggesting a past horizontal gene transfer event.}, } @article {pmid37754275, year = {2023}, author = {Ma, J and Zhao, H and Mo, S and Li, J and Ma, X and Tang, Y and Li, H and Liu, Z}, title = {Acquisition of Type I methyltransferase via horizontal gene transfer increases the drug resistance of Aeromonas veronii.}, journal = {Microbial genomics}, volume = {9}, number = {9}, pages = {}, doi = {10.1099/mgen.0.001107}, pmid = {37754275}, issn = {2057-5858}, abstract = {Aeromonas veronii is an opportunistic pathogen that affects both fish and mammals, including humans, leading to bacteraemia, sepsis, meningitis and even death. The increasing virulence and drug resistance of A. veronii are of significant concern and pose a severe risk to public safety. The Type I restriction-modification (RM) system, which functions as a bacterial defence mechanism, can influence gene expression through DNA methylation. However, little research has been conducted to explore its origin, evolutionary path, and relationship to virulence and drug resistance in A. veronii. In this study, we analysed the pan-genome of 233 A. veronii strains, and the results indicated that it was 'open', meaning that A. veronii has acquired additional genes from other species. This suggested that A. veronii had the potential to adapt and evolve rapidly, which might have contributed to its drug resistance. One Type I methyltransferase (MTase) and two complete Type I RM systems were identified, namely AveC4I, AveC4II and AveC4III in A. veronii strain C4, respectively. Notably, AveC4I was exclusive to A. veronii C4. Phylogenetic analysis revealed that AveC4I was derived from horizontal gene transfer from Thiocystis violascens and exchanged genes with the human pathogen Comamonas kerstersii. Single molecule real-time sequencing was applied to identify the motif methylated by AveC4I, which was unique and not recognized by any reported MTases in the REBASE database. We also annotated the functions and pathways of the genes containing the motif, revealing that AveC4I may control drug resistance in A. veronii C4. Our findings provide new insight on the mechanisms underlying drug resistance in pathogenic bacteria. By identifying the specific genes and pathways affected by AveC4I, this study may aid in the development of new therapeutic approaches to combat A. veronii infections.}, } @article {pmid37752302, year = {2023}, author = {Woolley, SA and Salavati, M and Clark, EL}, title = {Recent advances in the genomic resources for sheep.}, journal = {Mammalian genome : official journal of the International Mammalian Genome Society}, volume = {}, number = {}, pages = {}, pmid = {37752302}, issn = {1432-1777}, support = {BB/S01540X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/D/10002070/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/S01540X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Sheep (Ovis aries) provide a vital source of protein and fibre to human populations. In coming decades, as the pressures associated with rapidly changing climates increase, breeding sheep sustainably as well as producing enough protein to feed a growing human population will pose a considerable challenge for sheep production across the globe. High quality reference genomes and other genomic resources can help to meet these challenges by: (1) informing breeding programmes by adding a priori information about the genome, (2) providing tools such as pangenomes for characterising and conserving global genetic diversity, and (3) improving our understanding of fundamental biology using the power of genomic information to link cell, tissue and whole animal scale knowledge. In this review we describe recent advances in the genomic resources available for sheep, discuss how these might help to meet future challenges for sheep production, and provide some insight into what the future might hold.}, } @article {pmid37750924, year = {2023}, author = {Adhikari, T and Olukolu, B and Paudel, R and Pandey, A and Halterman, D and Louws, F}, title = {Genotyping-by-Sequencing Reveals Population Differentiation and Linkage Disequilibrium in Alternaria linariae from Tomato.}, journal = {Phytopathology}, volume = {}, number = {}, pages = {}, doi = {10.1094/PHYTO-07-23-0229-R}, pmid = {37750924}, issn = {0031-949X}, abstract = {Alternaria linariae (Neerg.) Simmons is an economically important foliar pathogen that causes early blight disease in tomatoes. Understanding genetic diversity, population genetic structure, and evolutionary potential is crucial to contemplating effective disease management strategies. We leveraged genotyping-by-sequencing (GBS) technology to compare genome-wide variation in 124 isolates of Alternaria spp. (A. alternata, A. linariae, and A. solani) for comparative genome analysis and to test the hypotheses of genetic differentiation and linkage disequilibrium (LD) in A. linariae collected from tomatoes in western North Carolina. We performed a pangenome-aware variant calling and filtering with GBSapp and identified 53,238 variants conserved across the reference genomes of three Alternaria spp. The highest marker density was observed on chromosome 1 (7 Mb). Both discriminant analysis of principal components (DAPC) and Bayesian model-based STRUCTURE analysis of A. linariae isolates revealed three subpopulations with minimal admixture. The genetic differentiation coefficient (FST) within A. linariae subpopulations were similar and high (0.86), indicating that alleles in the subpopulations are fixed and the genetic structure is likely due to restricted recombination. Analysis of molecular variance indicates higher variation among populations (89%) than within the population (11%). We found long-range LD between pairs of loci in A. linariae, supporting the hypothesis of low recombination expected for a fungal pathogen with limited asexual reproduction. Our findings provide evidence of a high level of population genetic differentiation in A. linariae, which reinforces the importance of developing tomato varieties with broad-spectrum resistance to various isolates of A. linariae.}, } @article {pmid37745608, year = {2023}, author = {Lin, MJ and Iyer, S and Chen, NC and Langmead, B}, title = {Measuring, visualizing and diagnosing reference bias with biastools.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.09.13.557552}, pmid = {37745608}, abstract = {A goal of recent alignment methods is to reduce reference bias, which occurs when reads containing non-reference alleles fail to align to their true point of origin. However, there is a lack of methods for systematically measuring, categorizing, and diagnosing reference bias. We present biastools , which analyzes and categorizes instances of reference bias. Biastools has different sets of functionality tailored to different scenarios, i.e. (a) when the donor genome is well-characterized and input reads are simulated, (b) when the donor is well-characterized and reads are real, and (c) when the donor is not well-characterized and reads are real. When possible, biastools divides instances of reference bias into categories according to their cause: bias due to loss, flux, or local misalignment. Biastools 's scan mode detects large-scale mapping artifacts due to structural variation and flaws in the reference representation. Our findings confirm that including more variants in a graph genome alignment method results in fewer reference biases. We also find that end-to-end alignment modes are effective in reducing bias at insertions and deletions, compared to local aligners that allow soft clipping. Finally, we use biastools to characterize the ways in which using the new telomere-to-telomere human reference can improve bias at a large scale. In short, biastools is a tool uniquely focused on reference bias, making it a valuable resource as the field continues to develop new aligners and pangenome representations to reduce bias.}, } @article {pmid37744919, year = {2023}, author = {Deng, Y and Mou, T and Wang, J and Su, J and Yan, Y and Zhang, YQ}, title = {Characterization of three rapidly growing novel Mycobacterium species with significant polycyclic aromatic hydrocarbon bioremediation potential.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1225746}, pmid = {37744919}, issn = {1664-302X}, abstract = {Mycobacterium species exhibit high bioremediation potential for the degradation of polycyclic aromatic hydrocarbons (PAHs) that are significant environmental pollutants. In this study, three Gram-positive, rapidly growing strains (YC-RL4[T], MB418[T], and HX176[T]) were isolated from petroleum-contaminated soils and were classified as Mycobacterium within the family Mycobacteriaceae. Genomic average nucleotide identity (ANI; < 95%) and digital DNA-DNA hybridization (dDDH; < 70%) values relative to other Mycobacterium spp. indicated that the strains represented novel species. The morphological, physiological, and chemotaxonomic characteristics of the isolates also supported their affiliation with Mycobacterium and their delineation as novel species. The strains were identified as Mycobacterium adipatum sp. nov. (type strain YC-RL4[T] = CPCC 205684[T] = CGMCC 1.62027[T]), Mycobacterium deserti sp. nov. (type strain MB418[T] = CPCC 205710[T] = KCTC 49782[T]), and Mycobacterium hippophais sp. nov. (type strain HX176[T] = CPCC 205372[T] = KCTC 49413[T]). Genes encoding enzymes involved in PAH degradation and metal resistance were present in the genomes of all three strains. Specifically, genes encoding alpha subunits of aromatic ring-hydroxylating dioxygenases were encoded by the genomes. The genes were also identified as core genes in a pangenomic analysis of the three strains along with 70 phylogenetically related mycobacterial strains that were previously classified as Mycolicibacterium. Notably, strain YC-RL4[T] could not only utilize phthalates as their sole carbon source for growth, but also convert di-(2-ethylhexyl) phthalate into phthalic acid. These results indicated that strains YC-RL4[T], MB418[T], and HX176[T] were important resources with significant bioremediation potential in soils contaminated by PAHs and heavy metals.}, } @article {pmid37740204, year = {2023}, author = {Bhattacharya, A and Das, S and Bhattacharjee, MJ and Mukherjee, AK and Khan, MR}, title = {Comparative pangenomic analysis of predominant human vaginal lactobacilli strains towards population-specific adaptation: understanding the role in sustaining a balanced and healthy vaginal microenvironment.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {565}, pmid = {37740204}, issn = {1471-2164}, mesh = {Humans ; Female ; *Lactobacillus/genetics ; Phylogeny ; *Bacteriocins/genetics ; Biological Evolution ; Cell Membrane ; }, abstract = {The vaginal microenvironment of healthy women has a predominance of Lactobacillus crispatus, L. iners, L. gasseri, and L. jensenii. The genomic repertoire of the strains of each of the species associated with the key attributes thereby regulating a healthy vaginal environment needs a substantial understanding.We studied all available human strains of the four lactobacilli across different countries, isolated from vaginal and urinal sources through phylogenetic and pangenomic approaches. The findings showed that L. iners has the highest retention of core genes, and L. crispatus has more gene gain in the evolutionary stratum. Interestingly, L. gasseri and L. jensenii demonstrated major population-specific gene-cluster gain/loss associated with bacteriocin synthesis, iron chelating, adherence, zinc and ATP binding proteins, and hydrolase activity. Gene ontology enrichment analysis revealed that L. crispatus strains showed greater enrichment of functions related to plasma membrane integrity, biosurfactant, hydrogen peroxide synthesis, and iron sequestration as an ancestral derived core function, while bacteriocin and organic acid biosynthesis are strain-specific accessory enriched functions. L. jensenii showed greater enrichment of functions related to adherence, aggregation, and exopolysaccharide synthesis. Notably, the key functionalities are heterogeneously enriched in some specific strains of L. iners and L. gasseri.This study shed light on the genomic features and their variability that provides advantageous attributes to predominant vaginal Lactobacillus species maintaining vaginal homeostasis. These findings evoke the need to consider region-specific candidate strains of Lactobacillus to formulate prophylactic measures against vaginal dysbiosis for women's health.}, } @article {pmid37738420, year = {2023}, author = {Welgemoed, T and Duong, TA and Barnes, I and Stukenbrock, EH and Berger, DK}, title = {Population genomic analyses suggest recent dispersal events of the pathogen Cercospora zeina into East and Southern African maize cropping systems.}, journal = {G3 (Bethesda, Md.)}, volume = {}, number = {}, pages = {}, doi = {10.1093/g3journal/jkad214}, pmid = {37738420}, issn = {2160-1836}, abstract = {A serious factor hampering global maize production is gray leaf spot disease. Cercospora zeina is one of the causative pathogens, but population genomics analysis of C. zeina is lacking. We conducted whole-genome Illumina sequencing of a representative set of 30 C. zeina isolates from Kenya and Uganda (East Africa) and Zambia, Zimbabwe and South Africa (Southern Africa). Selection of the diverse set was based on microsatellite data from a larger collection of the pathogen. Pangenome analysis of the C. zeina isolates was done by (i) de novo assembly of the reads with SPAdes, (ii) annotation with BRAKER, and (iii) protein clustering with OrthoFinder. A published long-read assembly of C. zeina (CMW25467) from Zambia was included and annotated using the same pipeline. This analysis revealed 790 non-shared accessory and 10,677 shared core orthogroups (genes) between the 31 isolates. Accessory gene content was largely shared between isolates from all countries, with a few genes unique to populations from Southern Africa (32) or East Africa (6). There was a significantly higher proportion of effector genes in the accessory secretome (44%) compared to the core secretome (24%). PCA, ADMIXTURE, and phylogenetic analysis using a neighbour-net network indicated a population structure with a geographical subdivision between the East African isolates and the Southern African isolates, although gene flow was also evident. The small pangenome and partial population differentiation indicated recent dispersal of C. zeina into Africa, possibly from two regional founder populations, followed by recurrent gene flow owing to widespread maize production across sub-Saharan Africa.}, } @article {pmid37736763, year = {2023}, author = {Peña-Montenegro, TD and Kleindienst, S and Allen, AE and Eren, AM and McCrow, JP and Sánchez-Calderón, JD and Arnold, J and Joye, SB}, title = {Species-specific responses of marine bacteria to environmental perturbation.}, journal = {ISME communications}, volume = {3}, number = {1}, pages = {99}, pmid = {37736763}, issn = {2730-6151}, support = {ECOGIG-2//Gulf of Mexico Research Initiative (GoMRI)/ ; Ecogig-2//Gulf of Mexico Research Initiative (GoMRI)/ ; Ecogig-2//Gulf of Mexico Research Initiative (GoMRI)/ ; Ecogig-2//Gulf of Mexico Research Initiative (GoMRI)/ ; }, abstract = {Environmental perturbations shape the structure and function of microbial communities. Oil spills are a major perturbation and resolving spills often requires active measures like dispersant application that can exacerbate the initial disturbance. Species-specific responses of microorganisms to oil and dispersant exposure during such perturbations remain largely unknown. We merged metatranscriptomic libraries with pangenomes to generate Core-Accessory Metatranscriptomes (CA-Metatranscriptomes) for two microbial hydrocarbon degraders that played important roles in the aftermath of the Deepwater Horizon oil spill. The Colwellia CA-Metatranscriptome illustrated pronounced dispersant-driven acceleration of core (~41%) and accessory gene (~59%) transcription, suggesting an opportunistic strategy. Marinobacter responded to oil exposure by expressing mainly accessory genes (~93%), suggesting an effective hydrocarbon-degrading lifestyle. The CA-Metatranscriptome approach offers a robust way to identify the underlying mechanisms of key microbial functions and highlights differences of specialist-vs-opportunistic responses to environmental disturbance.}, } @article {pmid37732781, year = {2023}, author = {Dong, X and Yu, Y and Liu, J and Cao, D and Xiang, Y and Bi, K and Yuan, X and Li, S and Wu, T and Zhang, Y}, title = {Whole-genome sequencing provides insights into a novel species: Providencia hangzhouensis associated with urinary tract infections.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0122723}, doi = {10.1128/spectrum.01227-23}, pmid = {37732781}, issn = {2165-0497}, abstract = {Providencia rettgeri is a clinically significant opportunistic pathogen that is involved in urinary tract infections. Due to the resolution limitations of identification, distinguishing P. rettgeri from closely related species is challenging by commercial biochemical test systems. Here, we first reported a novel species, Providencia hangzhouensis, which had been misidentified as P. rettgeri. Exhibiting ≤91.97% average nucleotide identity (ANI) and ≤46.10% in silico DNA-DNA hybridization values with all known Providencia species, P. hangzhouensis falls well beneath the established species-defining thresholds. We conducted a population genomics analysis of P. hangzhouensis isolates worldwide. Our study revealed that P. hangzhouensis has emerged in many countries and has formed several transmission clusters. We found that P. hangzhouensis shared the highest ANI values (91.54% and 91.97%) with P. rettgeri and P. huaxiensis, respectively. The pan-genome analysis revealed that these three species possessed a similar component of pan-genomes. Two genes associated with metabolism, folE2 and ccmM, were identified to be specific to P. hangzhouensis. Furthermore, we also observed that carbapenem-resistance genes frequently occur in P. hangzhouensis with the blaIMP-27 being the most prevalent (46.15%; 36/78). The emergence of P. hangzhouensis is often accompanied by extended-spectrum β-lactamase and carbapenem-resistance genes, and calls for tailored surveillance of this species as a clinically relevant species in the future. IMPORTANCE Our study has identified and characterized a novel species, Providencia hangzhouensis, which is associated with urinary tract infections and was previously misidentified as Providencia rettgeri. Through this study, we have identified specific genes unique to P. hangzhouensis, which could serve as marker genes for rapid PCR identification. Additionally, our findings suggest that the emergence of P. hangzhouensis is often accompanied by extended-spectrum β-lactamase and carbapenem-resistance genes, emphasizing the need for attention to clinical management and the importance of accurate species identification and proper drug use.}, } @article {pmid37728044, year = {2023}, author = {Lin, Y and Zhu, Y and Cui, Y and Qian, H and Yuan, Q and Chen, R and Lin, Y and Chen, J and Zhou, X and Shi, C and He, H and Hu, T and Gu, C and Yu, X and Zhu, X and Wang, Y and Qian, Q and Zhang, C and Wang, F and Shang, L}, title = {Identification of natural allelic variation in TTL1 controlling thermotolerance and grain size by a rice super pan-genome.}, journal = {Journal of integrative plant biology}, volume = {}, number = {}, pages = {}, doi = {10.1111/jipb.13568}, pmid = {37728044}, issn = {1744-7909}, abstract = {Continuously increasing global temperatures present great challenges to food security. Grain size, one of the critical components determining grain yield in rice (Oryza sativa L.), is a prime target for genetic breeding. Thus, there is an immediate need for genetic improvement in rice to maintain grain yield under heat stress. However, quantitative trait loci (QTLs) endowing heat stress tolerance and grain size in rice are extremely rare. Here, we identified a novel negative regulator with pleiotropic effects, Thermo-Tolerance and grain Length 1 (TTL1), from the super pan-genomic and transcriptomic data. Loss-of-function mutations in TTL1 enhanced heat tolerance, and caused an increase in grain size by coordinating cell expansion and proliferation. TTL1 was shown to function as a transcriptional regulator and localized to the nucleus and cell membrane. Furthermore, haplotype analysis showed that hap [L] and hap [S] of TTL1 were obviously correlated with variations of thermotolerance and grain size in a core collection of cultivars. Genome evolution analysis of available rice germplasms suggested that TTL1 was selected during domestication of the indica and japonica rice subspecies, but still had much breeding potential for increasing grain length and thermotolerance. These findings provide insights into TTL1 as a novel potential target for development of high-yield and thermotolerant rice varieties. This article is protected by copyright. All rights reserved.}, } @article {pmid37727231, year = {2023}, author = {Rios Galicia, B and Sáenz, JS and Yergaliyev, T and Camarinha-Silva, A and Seifert, J}, title = {Host specific adaptations of Ligilactobacillus aviarius to poultry.}, journal = {Current research in microbial sciences}, volume = {5}, number = {}, pages = {100199}, pmid = {37727231}, issn = {2666-5174}, abstract = {The genus Ligilactobacillus encompasses species adapted to vertebrate hosts and fermented food. Their genomes encode adaptations to the host lifestyle. Reports of gut microbiota from chicken and turkey gastrointestinal tract have shown a high persistence of Ligilactobacillus aviarius along the digestive system compared to other species found in the same host. However, its adaptations to poultry as a host has not yet been described. In this work, the pan-genome of Ligilactobacillus aviarius was explored to describe the functional adaptability to the gastrointestinal environment. The core genome is composed of 1179 gene clusters that are present at least in one copy that codifies to structural, ribosomal and biogenesis proteins. The rest of the identified regions were classified into three different functional clusters of orthologous groups (clusters) that codify carbohydrate metabolism, envelope biogenesis, viral defence mechanisms, and mobilome inclusions. The pan-genome of Ligilactobacillus aviarius is a closed pan-genome, frequently found in poultry and highly prevalent across chicken faecal samples. The genome of L. aviarius codifies different clusters of glycoside hydrolases and glycosyltransferases that mediate interactions with the host cells. Accessory features, such as antiviral mechanisms and prophage inclusions, variate amongst strains from different GIT sections. This information provides hints about the interaction of this species with viral particles and other bacterial species. This work highlights functional adaptability traits present in L. aviarius that make it a dominant key member of the poultry gut microbiota and enlightens the convergent ecological relation of this species to the poultry gut environment.}, } @article {pmid37722405, year = {2023}, author = {Low, SJ and O'Neill, MT and Kerry, WJ and Krysiak, M and Papadakis, G and Whitehead, LW and Savic, I and Prestedge, J and Williams, L and Cooney, JP and Tran, T and Lim, CK and Caly, L and Towns, JM and Bradshaw, CS and Fairley, C and Chow, EPF and Chen, MY and Pellegrini, M and Pasricha, S and Williamson, DA}, title = {Rapid detection of monkeypox virus using a CRISPR-Cas12a mediated assay: a laboratory validation and evaluation study.}, journal = {The Lancet. Microbe}, volume = {}, number = {}, pages = {}, doi = {10.1016/S2666-5247(23)00148-9}, pmid = {37722405}, issn = {2666-5247}, abstract = {BACKGROUND: The 2022 outbreak of mpox (formerly known as monkeypox) led to the spread of monkeypox virus (MPXV) in over 110 countries, demanding effective disease management and surveillance. As current diagnostics rely largely on centralised laboratory testing, our objective was to develop a simple rapid point-of-care assay to detect MPXV in clinical samples using isothermal amplification coupled with CRISPR and CRISPR-associated protein (Cas) technology.

METHODS: In this proof-of-concept study, we developed a portable isothermal amplification CRISPR-Cas12a-based assay for the detection of MPXV. We designed a panel of 22 primer-guide RNA sets using pangenome and gene-agnostic approaches, and subsequently shortlisted the three sets producing the strongest signals for evaluation of analytical sensitivity and specificity using a fluorescence-based readout. The set displaying 100% specificity and the lowest limit of detection (LOD) was selected for further assay validation using both a fluorescence-based and lateral-flow readout. Assay specificity was confirmed using a panel of viral and bacterial pathogens. Finally, we did a blind concordance study on genomic DNA extracted from 185 clinical samples, comparing assay results with a gold-standard quantitative PCR (qPCR) assay. We identified the optimal time to detection and analysed the performance of the assay relative to qPCR using receiver operating characteristic (ROC) curves. We also assessed the compatibility with lateral-flow strips, both visually and computationally, where strips were interpreted blinded to the fluorescence results on the basis of the presence or absence of test bands.

FINDINGS: With an optimal run duration of approximately 45 min from isothermal amplification to CRISPR-assay readout, the MPXV recombinase polymerase amplification CRISPR-Cas12a-based assay with the selected primer-guide set had an LOD of 1 copy per μL and 100% specificity against tested viral pathogens. Blinded concordance testing of 185 clinical samples resulted in 100% sensitivity (95% CI 89·3-100) and 99·3% specificity (95% CI 95·7-100) using the fluorescence readout. For optimal time to detection by fluorescence readout, we estimated the areas under the ROC curve to be 0·98 at 2 min and 0·99 at 4 min. Lateral-flow strips had 100% sensitivity (89·3-100) and 98·6% specificity (94·7-100) with both visual and computational assessment. Overall, lateral-flow results were highly concordant with fluorescence-based readouts (179 of 185 tests, 96·8% concordant), with discrepancies associated with low viral load samples.

INTERPRETATION: Our assay for the diagnosis of mpox displayed good performance characteristics compared with qPCR. Although optimisation of the assay will be required before deployment, its usability and versatility present a potential solution to MPXV detection in low-resource and remote settings, as well as a means of community-based, on-site testing.

FUNDING: Victorian Medical Research Accelerator Fund and the Australian Government Department of Health.}, } @article {pmid37714713, year = {2023}, author = {Dai, X and Bian, P and Hu, D and Luo, F and Huang, Y and Jiao, S and Wang, X and Gong, M and Li, R and Cai, Y and Wen, J and Yang, Q and Deng, W and Nanaei, HA and Wang, Y and Wang, F and Zhang, Z and Rosen, BD and Heller, R and Jiang, Y}, title = {A Chinese indicine pangenome reveals a wealth of novel structural variants introgressed from other Bos species.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277481.122}, pmid = {37714713}, issn = {1549-5469}, abstract = {Chinese indicine cattle harbor a much higher genetic diversity compared with other domestic cattle, but their genome architecture remains uninvestigated. Using PacBio HiFi sequencing data from 10 Chinese indicine cattle across southern China, we assembled 20 high-quality partially phased genomes and integrated them into a multiassembly graph containing 148.5 Mb (5.6%) of novel sequence. We identified 156,009 high-confidence nonredundant structural variants (SVs) and 206 SV hotspots spanning ∼195 Mb of gene-rich sequence. We detected 34,249 archaic introgressed fragments in Chinese indicine cattle covering 1.93 Gb (73.3%) of the genome. We inferred an average of 3.8%, 3.2%, 1.4%, and 0.5% of introgressed sequence originating, respectively, from banteng-like, kouprey-like, gayal-like, and gaur-like Bos species, as well as 0.6% of unknown origin. Introgression from multiple donors might have contributed to the genetic diversity of Chinese indicine cattle. Altogether, this study highlights the contribution of interspecies introgression to the genomic architecture of an important livestock population and shows how exotic genomic elements can contribute to the genetic variation available for selection.}, } @article {pmid37710263, year = {2023}, author = {Zhu, Q and Dovletgeldiyev, A and Shen, C and Li, K and Hu, S and He, Z}, title = {Comparative genomic analysis of Fusobacterium nucleatum reveals high intra-species diversity and cgmlst marker construction.}, journal = {Gut pathogens}, volume = {15}, number = {1}, pages = {43}, pmid = {37710263}, issn = {1757-4749}, abstract = {BACKGROUND: Fusobacterium nucleatum is a one of the most important anaerobic opportunistic pathogens in the oral and intestinal tracts of human and animals. It can cause various diseases such as infections, Lemierre's syndrome, oral cancer and colorectal cancer. The comparative genomic studies on the population genome level, have not been reported.

RESULTS: We analyzed all publicly available Fusobacterium nucleatums' genomic data for a comparative genomic study, focusing on the pan-genomic features, virulence genes, plasmid genomes and developed cgmlst molecular markers. We found the pan-genome shows a clear open tendency and most of plasmids in Fusobacterium nucleatum are mainly transmitted intraspecifically.

CONCLUSIONS: Our comparative analysis of Fusobacterium nucleatum systematically revealed the open pan-genomic features and phylogenetic tree based on cgmlst molecular markers. What's more, we also identified common plasmid typing among genomes. We hope that our study will provide a theoretical basis for subsequent functional studies.}, } @article {pmid37710174, year = {2023}, author = {Mahboob, S and Ullah, N and Farhan Ul Haque, M and Rauf, W and Iqbal, M and Ali, A and Rahman, M}, title = {Genomic characterization and comparative genomic analysis of HS-associated Pasteurella multocida serotype B:2 strains from Pakistan.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {546}, pmid = {37710174}, issn = {1471-2164}, support = {NRPU-7254//Higher Education Commision, Pakistan/ ; }, mesh = {Animals ; Cattle ; Pakistan ; *Pasteurella multocida/genetics ; Serogroup ; *Hemorrhagic Septicemia/veterinary ; Genomics ; Buffaloes ; }, abstract = {BACKGROUND: Haemorrhagic septicaemia (HS) is a highly fatal and predominant disease in livestock, particularly cattle and buffalo in the tropical regions of the world. Pasteurella multocida (P. multocida), serotypes B:2 and E:2, are reported to be the main causes of HS wherein serotype B:2 is more common in Asian countries including Pakistan and costs heavy financial losses every year. As yet, very little molecular and genomic information related to the HS-associated serotypes of P. multocida isolated from Pakistan is available. Therefore, this study aimed to explore the characteristics of novel bovine isolates of P. multocida serotype B:2 at the genomic level and perform comparative genomic analysis of various P. multocida strains from Pakistan to better understand the genetic basis of pathogenesis and virulence.

RESULTS: To understand the genomic variability and pathogenomics, we characterized three HS-associated P. multocida serotype B:2 strains isolated from the Faisalabad (PM1), Peshawar (PM2) and Okara (PM3) districts of Punjab, Pakistan. Together with the other nine publicly available Pakistani-origin P. multocida strains and a reference strain Pm70, a comparative genomic analysis was performed. The sequenced strains were characterized as serotype B and belong to ST-122. The strains contain no plasmids; however, each strain contains at least two complete prophages. The pan-genome analysis revealed a higher number of core genes indicating a close resemblance to the studied genomes and very few genes (1%) of the core genome serve as a part of virulence, disease, and defense mechanisms. We further identified that studied P. multocida B:2 strains harbor common antibiotic resistance genes, specifically PBP3 and EF-Tu. Remarkably, the distribution of virulence factors revealed that OmpH and plpE were not present in any P. multocida B:2 strains while the presence of these antigens was reported uniformly in all serotypes of P. multocida.

CONCLUSION: This study's findings indicate the absence of OmpH and PlpE in the analyzed P. multocida B:2 strains, which are known surface antigens and provide protective immunity against P. multocida infection. The availability of additional genomic data on P. multocida B:2 strains from Pakistan will facilitate the development of localized therapeutic agents and rapid diagnostic tools specifically targeting HS-associated P. multocida B:2 strains.}, } @article {pmid37695773, year = {2023}, author = {Le Naour-Vernet, M and Charriat, F and Gracy, J and Cros-Arteil, S and Ravel, S and Veillet, F and Meusnier, I and Padilla, A and Kroj, T and Cesari, S and Gladieux, P}, title = {Adaptive evolution in virulence effectors of the rice blast fungus Pyricularia oryzae.}, journal = {PLoS pathogens}, volume = {19}, number = {9}, pages = {e1011294}, pmid = {37695773}, issn = {1553-7374}, mesh = {Virulence/genetics ; Amino Acid Sequence ; *Amino Acids ; *Ascomycota/genetics ; }, abstract = {Plant pathogens secrete proteins called effectors that target host cellular processes to promote disease. Recently, structural genomics has identified several families of fungal effectors that share a similar three-dimensional structure despite remarkably variable amino-acid sequences and surface properties. To explore the selective forces that underlie the sequence variability of structurally-analogous effectors, we focused on MAX effectors, a structural family of effectors that are major determinants of virulence in the rice blast fungus Pyricularia oryzae. Using structure-informed gene annotation, we identified 58 to 78 MAX effector genes per genome in a set of 120 isolates representing seven host-associated lineages. The expression of MAX effector genes was primarily restricted to the early biotrophic phase of infection and strongly influenced by the host plant. Pangenome analyses of MAX effectors demonstrated extensive presence/absence polymorphism and identified gene loss events possibly involved in host range adaptation. However, gene knock-in experiments did not reveal a strong effect on virulence phenotypes suggesting that other evolutionary mechanisms are the main drivers of MAX effector losses. MAX effectors displayed high levels of standing variation and high rates of non-synonymous substitutions, pointing to widespread positive selection shaping the molecular diversity of MAX effectors. The combination of these analyses with structural data revealed that positive selection acts mostly on residues located in particular structural elements and at specific positions. By providing a comprehensive catalog of amino acid polymorphism, and by identifying the structural determinants of the sequence diversity, our work will inform future studies aimed at elucidating the function and mode of action of MAX effectors.}, } @article {pmid37695632, year = {2023}, author = {Naveed, M and Mahmood, S and Aziz, T and Azeem, A and Hussain, I and Waseem, M and Ali, A and Alharbi, M and Alshammari, A and Alasmari, AF}, title = {Designing a novel chimeric multi-epitope vaccine subunit against Staphylococcus argenteus through artificial intelligence approach integrating pan-genome analysis, in vitro identification, and immunogenicity profiling.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-16}, doi = {10.1080/07391102.2023.2256881}, pmid = {37695632}, issn = {1538-0254}, abstract = {Staphylococcus argenteus is a newly identified pathogen that causes respiratory tract infections, skin infections, such as cellulitis, abscesses, and impetigo, and currently, there is no licensed vaccine available against it. To develop a vaccine against S. argenteus, a bacterial pan-genome analysis was applied to identify potential vaccine candidates. A total of 4908 core proteins were retrieved and utilized for identifying four proteins, including SG38 Panton-Valentine leukocidin LukS-PV protein, SG62 staphylococcal enterotoxin type A protein, SG39 enterotoxin B protein, and SG43 enterotoxin type C3 protein as potential vaccine candidates. Epitopes were predicted for these proteins using different types of B and T-cell epitope prediction tools, and only those with a non-toxic profile, antigenic, non-allergenic, and immunogenic were selected. The selected epitopes were linked to each other to form a multi-epitope vaccine construct, which was further linked to the PADRE sequence (AKFVAAWTLKAAA) and 50s ribosomal L7/L12 protein to enhance the vaccine's antigenicity. The three-dimensional structure of the vaccine construct was assessed to determine its binding affinity with key Toll-like receptor 9 (TLR-9) and Toll-like receptor 5 (TLR-5) immune cell receptors. Our findings demonstrate that the vaccine exhibits favorable binding interactions with these immune cell receptors, indicating its potential efficacy. Molecular dynamic simulations further confirmed the accessibility of vaccine epitopes to the host immune system, substantiating its ability to elicit protective immune responses. Taken together, this study highlights the promising candidacy of the modeled vaccine construct for future in vivo and in vitro experimental investigations.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37692398, year = {2023}, author = {Villacís, JE and Castelán-Sánchez, HG and Rojas-Vargas, J and Rodríguez-Cruz, UE and Albán, V and Reyes, JA and Meza-Rodríguez, PM and Dávila-Ramos, S and Villavicencio, F and Galarza, M and Gestal, MC}, title = {Emergence of Raoultella ornithinolytica in human infections from different hospitals in Ecuador with OXA-48-producing resistance.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1216008}, pmid = {37692398}, issn = {1664-302X}, abstract = {PURPOSE: The purpose of this study was to highlight the clinical and molecular features of 13 Raoultella ornithinolytica strains isolated from clinical environments in Ecuador, and to perform comparative genomics with previously published genomes of Raoultella spp. As Raoultella is primarily found in environmental, clinical settings, we focused our work on identifying mechanisms of resistance that can provide this bacterium an advantage to establish and persist in hospital environments.

METHODS: We analyzed 13 strains of Raoultella ornithinolytica isolated from patients with healthcare associated infections (HAI) in three hospitals in Quito and one in Santo Domingo de Los Tsáchilas, Ecuador, between November 2017 and April 2018. These isolates were subjected to phenotypic antimicrobial susceptibility testing, end-point polymerase chain reaction (PCR) to detect the presence of carbapenemases and whole-genome sequencing.

RESULTS: Polymerase chain reaction revealed that seven isolates were positive isolates for blaOXA-48 and one for blaKPC-2 gene. Of the seven strains that presented the blaOXA-48 gene, six harbored it on an IncFII plasmid, one was inserted into the bacterial chromosome. The blaKPC gene was detected in an IncM2/IncR plasmid. From the bioinformatics analysis, nine genomes had the gene blaOXA-48, originating from Ecuador. Moreover, all R. ornithinolytica strains contained the ORN-1 gene, which confers resistance for β-lactams, such as penicillins and cephalosporins. Comparative genome analysis of the strains showed that the pangenome of R. ornithinolytica is considered an open pangenome, with 27.77% of core genes, which could be explained by the fact that the antibiotic resistance genes in the ancestral reconstruction are relatively new, suggesting that this genome is constantly incorporating new genes.

CONCLUSION: These results reveal the genome plasticity of R. ornithinolytica, particularly in acquiring antibiotic-resistance genes. The genomic surveillance and infectious control of these uncommon species are important since they may contribute to the burden of antimicrobial resistance and human health.}, } @article {pmid37690289, year = {2023}, author = {Sarker, P and Mitro, A and Hoque, H and Hasan, MN and Nurnabi Azad Jewel, GM}, title = {Identification of potential novel therapeutic drug target against Elizabethkingia anophelis by integrative pan and subtractive genomic analysis: An in silico approach.}, journal = {Computers in biology and medicine}, volume = {165}, number = {}, pages = {107436}, doi = {10.1016/j.compbiomed.2023.107436}, pmid = {37690289}, issn = {1879-0534}, abstract = {Elizabethkingia anophelis is a human pathogen responsible for severe nosocomial infections in neonates and immunocompromised patients. The significantly higher mortality rate from E. anophelis infections and the lack of available regimens highlight the critical need to explore novel drug targets. The current study investigated effective novel drug targets by employing a comprehensive in silico subtractive genomic approach integrated with pangenomic analysis of E. anophelis strains. A total of 2809 core genomic proteins were found by pangenomic analysis of non-paralogous proteins. Subsequently, 156 pathogen-specific, 442 choke point, 202 virulence factor, 53 antibiotic resistant and 119 host-pathogen interacting proteins were identified in E. anophelis. By subtractive genomic approach, at first 791 proteins were found to be indispensable for the survival of E. anophelis. 558 and 315 proteins were detected as non-homologous to human and gut microflora respectively. Following that 245 cytoplasmic, 245 novel, and 23 broad-spectrum targets were selected and finally four proteins were considered as potential therapeutic targets of E. anophelis based on highest degree score in PPI network. Among those, three proteins were subjected to molecular docking and subsequent MD simulation as one protein did not contain a plausible binding pocket with sufficient surface area and volume. All the complexes were found to be stable and compact in 100 ns molecular dynamics simulation studies as measured by RMSD, RMSF, and Rg. These three short-listed targets identified in this study may lead to the development of novel antimicrobials capable of curing infections and pave the way to prevent and control the disease progression caused by the deadly agent E. anophelis.}, } @article {pmid37684624, year = {2023}, author = {Nageeb, WM and Hetta, HF}, title = {Pangenome analysis of Corynebacterium striatum: insights into a neglected multidrug-resistant pathogen.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {252}, pmid = {37684624}, issn = {1471-2180}, mesh = {*Corynebacterium/genetics ; *Genomics ; Multigene Family ; Anti-Bacterial Agents/pharmacology ; Prophages/genetics ; }, abstract = {BACKGROUND: Over the past two decades, Corynebacterium striatum has been increasingly isolated from clinical cultures with most isolates showing increased antimicrobial resistance (AMR) to last resort agents. Advances in the field of pan genomics would facilitate the understanding of the clinical significance of such bacterial species previously thought to be among commensals paving the way for identifying new drug targets and control strategies.

METHODS: We constructed a pan-genome using 310 genome sequences of C. striatum. Pan-genome analysis was performed using three tools including Roary, PIRATE, and PEPPAN. AMR genes and virulence factors have been studied in relation to core genome phylogeny. Genomic Islands (GIs), Integrons, and Prophage regions have been explored in detail.

RESULTS: The pan-genome ranges between a total of 5253-5857 genes with 2070 - 1899 core gene clusters. Some antimicrobial resistance genes have been identified in the core genome portion, but most of them were located in the dispensable genome. In addition, some well-known virulence factors described in pathogenic Corynebacterium species were located in the dispensable genome. A total of 115 phage species have been identified with only 44 intact prophage regions.

CONCLUSION: This study presents a detailed comparative pangenome report of C. striatum. The species show a very slowly growing pangenome with relatively high number of genes in the core genome contributing to lower genomic variation. Prophage elements carrying AMR and virulence elements appear to be infrequent in the species. GIs appear to offer a prominent role in mobilizing antibiotic resistance genes in the species and integrons occur at a frequency of 50% in the species. Control strategies should be directed against virulence and resistance determinants carried on the core genome and those frequently occurring in the accessory genome.}, } @article {pmid37679681, year = {2023}, author = {Wang, Y and Xu, X and Chen, H and Yang, F and Xu, B and Wang, K and Liu, Q and Liang, G and Zhang, R and Jiao, X and Zhang, Y}, title = {Assessment of beneficial effects and identification of host adaptation-associated genes of Ligilactobacillus salivarius isolated from badgers.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {530}, pmid = {37679681}, issn = {1471-2164}, support = {PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; PAPD//Priority Academic Program Development of Jiangsu Higher Education Institutions/ ; }, mesh = {Animals ; Humans ; *Ligilactobacillus salivarius ; Host Adaptation ; Phylogeny ; Chickens ; Acclimatization ; Animals, Domestic ; }, abstract = {BACKGROUND: Ligilactobacillus salivarius has been frequently isolated from the gut microbiota of humans and domesticated animals and has been studied as a candidate probiotic. Badger (Meles meles) is known as a "generalist" species that consumes complex foods and exhibits tolerance and resistance to certain pathogens, which can be partly attributed to the beneficial microbes such as L. salivarius in the gut microbiota. However, our understanding of the beneficial traits and genomic features of badger-originated L. salivarius remains elusive.

RESULTS: In this study, nine L. salivarius strains were isolated from wild badgers' feces, one of which exhibited good probiotic properties. Complete genomes of the nine L. salivarius strains were generated, and comparative genomic analysis was performed with the publicly available complete genomes of L. salivarius obtained from humans and domesticated animals. The strains originating from badgers harbored a larger genome, a higher number of protein-coding sequences, and functionally annotated genes than those originating from humans and chickens. The pan-genome phylogenetic tree demonstrated that the strains originating from badgers formed a separate clade, and totally 412 gene families (12.6% of the total gene families in the pan-genome) were identified as genes gained by the last common ancestor of the badger group. The badger group harbored significantly more gene families responsible for the degradation of complex carbohydrate substrates and production of polysaccharides than strains from other hosts; many of these were acquired by gene gain events.

CONCLUSIONS: A candidate probiotic and nine L. salivarius complete genomes were obtained from the badgers' gut microbiome, and several beneficial genes were identified to be specifically present in the badger-originated strains that were gained in the evolution. Our study provides novel insights into the adaptation of L. salivarius to the intestinal habitat of wild badgers and provides valuable strain and genome resources for the development of L. salivarius as a probiotic.}, } @article {pmid37679363, year = {2023}, author = {Liu, F and Zhao, J and Sun, H and Xiong, C and Sun, X and Wang, X and Wang, Z and Jarret, R and Wang, J and Tang, B and Xu, H and Hu, B and Suo, H and Yang, B and Ou, L and Li, X and Zhou, S and Yang, S and Liu, Z and Yuan, F and Pei, Z and Ma, Y and Dai, X and Wu, S and Fei, Z and Zou, X}, title = {Genomes of cultivated and wild Capsicum species provide insights into pepper domestication and population differentiation.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {5487}, pmid = {37679363}, issn = {2041-1723}, mesh = {*Capsicum/genetics ; Domestication ; Vegetables ; Fruit/genetics ; Crops, Agricultural/genetics ; Camphor ; Menthol ; *Piper nigrum ; }, abstract = {Pepper (Capsicum spp.) is one of the earliest cultivated crops and includes five domesticated species, C. annuum var. annuum, C. chinense, C. frutescens, C. baccatum var. pendulum and C. pubescens. Here, we report a pepper graph pan-genome and a genome variation map of 500 accessions from the five domesticated Capsicum species and close wild relatives. We identify highly differentiated genomic regions among the domesticated peppers that underlie their natural variations in flowering time, characteristic flavors, and unique resistances to biotic and abiotic stresses. Domestication sweeps detected in C. annuum var. annuum and C. baccatum var. pendulum are mostly different, and the common domestication traits, including fruit size, shape and pungency, are achieved mainly through the selection of distinct genomic regions between these two cultivated species. Introgressions from C. baccatum into C. chinense and C. frutescens are detected, including those providing genetic sources for various biotic and abiotic stress tolerances.}, } @article {pmid37676357, year = {2023}, author = {Huang, B and Yan, H and Sun, M and Jin, Y}, title = {Novel discovery in roles of structural variations and RWP-RK transcription factors in heat tolerance for pearl millet.}, journal = {Stress biology}, volume = {3}, number = {1}, pages = {12}, pmid = {37676357}, issn = {2731-0450}, abstract = {Global warming adversely affects crop production worldwide. Massive efforts have been undertaken to study mechanisms regulating heat tolerance in plants. However, the roles of structural variations (SVs) in heat stress tolerance remain unclear. In a recent article, Yan et al. (Nat Genet 1-12, 2023) constructed the first pan-genome of pearl millet (Pennisetum glaucum) and identified key SVs linked to genes involved in regulating plant tolerance to heat stress for an important crop with a superior ability to thrive in extremely hot and arid climates. Through multi-omics analyses integrating by pan-genomics, comparative genomics, transcriptomics, population genetics and and molecular biological technologies, they found RWP-RK transcription factors cooperating with endoplasmic reticulum-related genes play key roles in heat tolerance in pearl millet. The results in this paper provided novel insights to advance the understanding of the genetic and genomic basis of heat tolerance and an exceptional resource for molecular breeding to improve heat tolerance in pearl millet and other crops.}, } @article {pmid37676306, year = {2023}, author = {González-Gómez, JP and Lozano-Aguirre, LF and Medrano-Félix, JA and Chaidez, C and Gerba, CP and Betancourt, WQ and Castro-Del Campo, N}, title = {Evaluation of nuclear and mitochondrial phylogenetics for the subtyping of Cyclospora cayetanensis.}, journal = {Parasitology research}, volume = {}, number = {}, pages = {}, pmid = {37676306}, issn = {1432-1955}, abstract = {Cyclospora cayetanensis is an enteric coccidian parasite responsible for gastrointestinal disease transmitted through contaminated food and water. It has been documented in several countries, mostly with low-socioeconomic levels, although major outbreaks have hit developed countries. Detection methods based on oocyst morphology, staining, and molecular testing have been developed. However, the current MLST panel offers an opportunity for enhancement, as amplification of all molecular markers remains unfeasible in the majority of samples. This study aims to address this challenge by evaluating two approaches for analyzing the genetic diversity of C. cayetanensis and identifying reliable markers for subtyping: core homologous genes and mitochondrial genome analysis. A pangenome was constructed using 36 complete genomes of C. cayetanensis, and a haplotype network and phylogenetic analysis were conducted using 33 mitochondrial genomes. Through the analysis of the pangenome, 47 potential markers were identified, emphasizing the need for more sequence data to achieve comprehensive characterization. Additionally, the analysis of mitochondrial genomes revealed 19 single-nucleotide variations that can serve as characteristic markers for subtyping this parasite. These findings not only contribute to the selection of molecular markers for C. cayetanensis subtyping, but they also drive the knowledge toward the potential development of a comprehensive genotyping method for this parasite.}, } @article {pmid37671027, year = {2023}, author = {Lee, H and Greer, SU and Pavlichin, DS and Zhou, B and Urban, AE and Weissman, T and , and Ji, HP}, title = {Pan-conserved segment tags identify ultra-conserved sequences across assemblies in the human pangenome.}, journal = {Cell reports methods}, volume = {3}, number = {8}, pages = {100543}, pmid = {37671027}, issn = {2667-2375}, support = {K01 MH129758/MH/NIMH NIH HHS/United States ; }, mesh = {Humans ; Conserved Sequence ; Haploidy ; *Neoplasms, Squamous Cell ; Polymorphism, Genetic ; *Skin Neoplasms ; }, abstract = {The human pangenome, a new reference sequence, addresses many limitations of the current GRCh38 reference. The first release is based on 94 high-quality haploid assemblies from individuals with diverse backgrounds. We employed a k-mer indexing strategy for comparative analysis across multiple assemblies, including the pangenome reference, GRCh38, and CHM13, a telomere-to-telomere reference assembly. Our k-mer indexing approach enabled us to identify a valuable collection of universally conserved sequences across all assemblies, referred to as "pan-conserved segment tags" (PSTs). By examining intervals between these segments, we discerned highly conserved genomic segments and those with structurally related polymorphisms. We found 60,764 polymorphic intervals with unique geo-ethnic features in the pangenome reference. In this study, we utilized ultra-conserved sequences (PSTs) to forge a link between human pangenome assemblies and reference genomes. This methodology enables the examination of any sequence of interest within the pangenome, using the reference genome as a comparative framework.}, } @article {pmid37668148, year = {2023}, author = {Mentasti, M and David, S and Turton, J and Morgan, M and Turner, L and Westlake, J and Jenkins, J and Williams, C and Rey, S and Watkins, J and Daniel, V and Mitchell, S and Forbes, G and Wootton, M and Jones, L}, title = {Clonal expansion and rapid characterization of Klebsiella pneumoniae ST1788, an otherwise uncommon strain spreading in Wales, UK.}, journal = {Microbial genomics}, volume = {9}, number = {9}, pages = {}, doi = {10.1099/mgen.0.001104}, pmid = {37668148}, issn = {2057-5858}, mesh = {Humans ; *Klebsiella pneumoniae/genetics ; Phylogeny ; Wales/epidemiology ; *Aminoglycosides ; Anti-Bacterial Agents ; }, abstract = {A multidrug-resistant strain of Klebsiella pneumoniae (Kp) sequence type (ST) 1788, an otherwise uncommon ST worldwide, was isolated from 65 patients at 11 hospitals and 11 general practices across South and West Wales, UK, between February 2019 and November 2021. A collection of 97 Kp ST1788 isolates (including 94 from Wales) was analysed to investigate the diversity and spread across Wales and to identify molecular marker(s) to aid development of a strain-specific real-time PCR. Whole genome sequencing (WGS) was performed with Illumina technology and the data were used to perform phylogenetic analyses. Pan-genome analysis of further Kp genome collections was used to identify an ST1788-specific gene target; a real-time PCR was then validated against a panel of 314 strains and 218 broth-enriched screening samples. Low genomic diversity was demonstrated amongst the 94 isolates from Wales. Evidence of spread within and across healthcare facilities was found. A yersiniabactin locus and the KL2 capsular locus were identified in 85/94 (90.4 %) and 94/94 (100 %) genomes respectively; bla SHV-232, bla TEM-1, bla CTX-M-15 and bla OXA-1 were simultaneously carried by 86/94 (91.5 %) isolates; 4/94 (4.3 %) isolates also carried bla OXA-48 carbapenemase. Aminoglycoside and fluoroquinolone resistance markers were found in 94/94 (100 %) and 86/94 (91.5 %) isolates respectively. The ST1788-specific real-time PCR was 100 % sensitive and specific. Our analyses demonstrated recent clonal expansion and spread of Kp ST1788 in the community and across healthcare facilities in South and West Wales with isolates carrying well-defined antimicrobial resistance and virulence markers. An ST1788-specific marker was also identified, enabling rapid and reliable preliminary characterization of isolates by real-time PCR. This study confirms the utility of WGS in investigating novel strains and in aiding proactive implementation of molecular tools to assist infection control specialists.}, } @article {pmid37667515, year = {2023}, author = {Baker, JL}, title = {Illuminating the oral microbiome and its host interactions: recent advancements in omics and bioinformatics technologies in the context of oral microbiome research.}, journal = {FEMS microbiology reviews}, volume = {47}, number = {5}, pages = {}, pmid = {37667515}, issn = {1574-6976}, support = {K99 DE029228/DE/NIDCR NIH HHS/United States ; }, mesh = {Humans ; *Quality of Life ; Computational Biology ; Genomics ; Metabolomics ; *Microbiota/genetics ; }, abstract = {The oral microbiota has an enormous impact on human health, with oral dysbiosis now linked to many oral and systemic diseases. Recent advancements in sequencing, mass spectrometry, bioinformatics, computational biology, and machine learning are revolutionizing oral microbiome research, enabling analysis at an unprecedented scale and level of resolution using omics approaches. This review contains a comprehensive perspective of the current state-of-the-art tools available to perform genomics, metagenomics, phylogenomics, pangenomics, transcriptomics, proteomics, metabolomics, lipidomics, and multi-omics analysis on (all) microbiomes, and then provides examples of how the techniques have been applied to research of the oral microbiome, specifically. Key findings of these studies and remaining challenges for the field are highlighted. Although the methods discussed here are placed in the context of their contributions to oral microbiome research specifically, they are pertinent to the study of any microbiome, and the intended audience of this includes researchers would simply like to get an introduction to microbial omics and/or an update on the latest omics methods. Continued research of the oral microbiota using omics approaches is crucial and will lead to dramatic improvements in human health, longevity, and quality of life.}, } @article {pmid37662009, year = {2023}, author = {Li, Z and Zhou, X and Liao, D and Liu, R and Zhao, X and Wang, J and Zhong, Q and Zeng, Z and Peng, Y and Tan, Y and Yang, Z}, title = {Comparative genomics and DNA methylation analysis of Pseudomonas aeruginosa clinical isolate PA3 by single-molecule real-time sequencing reveals new targets for antimicrobials.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1180194}, pmid = {37662009}, issn = {2235-2988}, mesh = {*Pseudomonas aeruginosa/genetics ; DNA Methylation ; Phylogeny ; Genomics ; *Anti-Infective Agents ; DNA ; }, abstract = {INTRODUCTION: Pseudomonas aeruginosa (P.aeruginosa) is an important opportunistic pathogen with broad environmental adaptability and complex drug resistance. Single-molecule real-time (SMRT) sequencing technique has longer read-length sequences, more accuracy, and the ability to identify epigenetic DNA alterations.

METHODS: This study applied SMRT technology to sequence a clinical strain P. aeruginosa PA3 to obtain its genome sequence and methylation modification information. Genomic, comparative, pan-genomic, and epigenetic analyses of PA3 were conducted.

RESULTS: General genome annotations of PA3 were discovered, as well as information about virulence factors, regulatory proteins (RPs), secreted proteins, type II toxin-antitoxin (TA) pairs, and genomic islands. A genome-wide comparison revealed that PA3 was comparable to other P. aeruginosa strains in terms of identity, but varied in areas of horizontal gene transfer (HGT). Phylogenetic analysis showed that PA3 was closely related to P. aeruginosa 60503 and P. aeruginosa 8380. P. aeruginosa's pan-genome consists of a core genome of roughly 4,300 genes and an accessory genome of at least 5,500 genes. The results of the epigenetic analysis identified one main methylation sites, N6-methyladenosine (m6A) and 1 motif (CATNNNNNNNTCCT/AGGANNNNNNNATG). 16 meaningful methylated sites were picked. Among these, purH, phaZ, and lexA are of great significance playing an important role in the drug resistance and biological environment adaptability of PA3, and the targeting of these genes may benefit further antibacterial studies.

DISUCSSION: This study provided a detailed visualization and DNA methylation information of the PA3 genome and set a foundation for subsequent research into the molecular mechanism of DNA methyltransferase-controlled P. aeruginosa pathogenicity.}, } @article {pmid37659733, year = {2023}, author = {Sharma, N and Raman, H and Wheeler, D and Kalenahalli, Y and Sharma, R}, title = {Data-driven approaches to improve water-use efficiency and drought resistance in crop plants.}, journal = {Plant science : an international journal of experimental plant biology}, volume = {336}, number = {}, pages = {111852}, doi = {10.1016/j.plantsci.2023.111852}, pmid = {37659733}, issn = {1873-2259}, abstract = {With the increasing population, there lies a pressing demand for food, feed and fibre, while the changing climatic conditions pose severe challenges for agricultural production worldwide. Water is the lifeline for crop production; thus, enhancing crop water-use efficiency (WUE) and improving drought resistance in crop varieties are crucial for overcoming these challenges. Genetically-driven improvements in yield, WUE and drought tolerance traits can buffer the worst effects of climate change on crop production in dry areas. While traditional crop breeding approaches have delivered impressive results in increasing yield, the methods remain time-consuming and are often limited by the existing allelic variation present in the germplasm. Significant advances in breeding and high-throughput omics technologies in parallel with smart agriculture practices have created avenues to dramatically speed up the process of trait improvement by leveraging the vast volumes of genomic and phenotypic data. For example, individual genome and pan-genome assemblies, along with transcriptomic, metabolomic and proteomic data from germplasm collections, characterised at phenotypic levels, could be utilised to identify marker-trait associations and superior haplotypes for crop genetic improvement. In addition, these omics approaches enable the identification of genes involved in pathways leading to the expression of a trait, thereby providing an understanding of the genetic, physiological and biochemical basis of trait variation. These data-driven gene discoveries and validation approaches are essential for crop improvement pipelines, including genomic breeding, speed breeding and gene editing. Herein, we provide an overview of prospects presented using big data-driven approaches (including artificial intelligence and machine learning) to harness new genetic gains for breeding programs and develop drought-tolerant crop varieties with favourable WUE and high-yield potential traits.}, } @article {pmid37655941, year = {2023}, author = {Meyer, S and Laval, L and Pimenta, M and González-Flores, Y and Gaschet, M and Couvé-Deacon, E and Barraud, O and Dagot, C and Ploy, MC}, title = {[Tracking transfers of resistance-carrying bacteria between animals, humans and the environment].}, journal = {Comptes rendus biologies}, volume = {}, number = {}, pages = {}, doi = {10.5802/crbiol.114}, pmid = {37655941}, issn = {1768-3238}, abstract = {The fight against antibiotic resistance must incorporate the "One Health" concept to be effective. This means having a holistic approach embracing the different ecosystems, human, animal, and environment. Transfers of resistance genes may exist between these three domains and different stresses related to the exposome may influence these transfers. Various targeted or pan-genomic molecular biology techniques can be used to better characterise the dissemination of bacterial clones and to identify exchanges of genes and mobile genetic elements between ecosystems.}, } @article {pmid37653687, year = {2023}, author = {Dixon, TA and Walker, RSK and Pretorius, IS}, title = {Visioning synthetic futures for yeast research within the context of current global techno-political trends.}, journal = {Yeast (Chichester, England)}, volume = {}, number = {}, pages = {}, doi = {10.1002/yea.3897}, pmid = {37653687}, issn = {1097-0061}, support = {//Australian Research Council/ ; }, abstract = {Yeast research is entering into a new period of scholarship, with new scientific tools, new questions to ask and new issues to consider. The politics of emerging and critical technology can no longer be separated from the pursuit of basic science in fields, such as synthetic biology and engineering biology. Given the intensifying race for technological leadership, yeast research is likely to attract significant investment from government, and that it offers huge opportunities to the curious minded from a basic research standpoint. This article provides an overview of new directions in yeast research with a focus on Saccharomyces cerevisiae, and places these trends in their geopolitical context. At the highest level, yeast research is situated within the ongoing convergence of the life sciences with the information sciences. This convergent effect is most strongly pronounced in areas of AI-enabled tools for the life sciences, and the creation of synthetic genomes, minimal genomes, pan-genomes, neochromosomes and metagenomes using computer-assisted design tools and methodologies. Synthetic yeast futures encompass basic and applied science questions that will be of intense interest to government and nongovernment funding sources. It is essential for the yeast research community to map and understand the context of their research to ensure their collaborations turn global challenges into research opportunities.}, } @article {pmid37646934, year = {2023}, author = {Bayer, PE and Edwards, D}, title = {Investigating Pangenome Graphs Using Wheat Panache.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2703}, number = {}, pages = {23-29}, pmid = {37646934}, issn = {1940-6029}, mesh = {Humans ; *Triticum/genetics ; *Agriculture ; Genomics ; Research Personnel ; Workflow ; }, abstract = {Pangenome graphs quickly become the central data structure representing the diversity of variation we see across related genomes. Pangenome graphs have been published for some species, including plants of agronomic interest. However, visualizing these graphs is not easy as the graphs are large, and variants within these graphs are complex. Tools are needed to visualize graph data structures. Here, we present a workflow to search and visualize a wheat pangenome graph using Wheat Panache. The approach presented assists researchers interested in wheat genomics.}, } @article {pmid37645952, year = {2023}, author = {McLaughlin, M and Fiebig, A and Crosson, S}, title = {XRE Transcription Factors Conserved in Caulobacter and φCbK Modulate Adhesin Development and Phage Production.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37645952}, support = {F32 GM141017/GM/NIGMS NIH HHS/United States ; R35 GM131762/GM/NIGMS NIH HHS/United States ; }, abstract = {Upon infection, transcriptional shifts in both a host bacterium and its invading phage determine host and viral fitness. The xenobiotic response element (XRE) family of transcription factors (TFs), which are commonly encoded by bacteria and phages, regulate diverse features of bacterial cell physiology and impact phage infection dynamics. Through a pangenome analysis of Caulobacter species isolated from soil and aquatic ecosystems, we uncovered an apparent radiation of a paralogous XRE TF gene cluster, several of which have established functions in the regulation of holdfast adhesin development and biofilm formation in C. crescentus . We further discovered related XRE TFs across the class Alphaproteobacteria and its phages, including the φCbK Caulophage, suggesting that members of this gene cluster impact host-phage interactions. Here we show that that a closely related group of XRE proteins, encoded by both C. crescentus and φCbK, can form heteromeric associations and control the transcription of a common gene set, influencing processes including holdfast development and the production of φCbK virions. The φCbK XRE paralog, tgrL , is highly expressed at the earliest stages of infection and can directly repress transcription of hfiA , a potent holdfast inhibitor, and gafYZ , a transcriptional activator of prophage-like gene transfer agents (GTAs) encoded on the C. crescentus chromosome. XRE proteins encoded from the C. crescentus chromosome also directly repress gafYZ transcription, revealing a functionally redundant set of host regulators that may protect against spurious production of GTA particles and inadvertent cell lysis. Deleting host XRE transcription factors reduced φCbK burst size, while overexpressing these genes or φCbK tgrL rescued this burst defect. We conclude that an XRE TF gene cluster, shared by C. crescentus and φCbK, plays an important role in adhesion regulation under phage-free conditions, and influences host-phage dynamics during infection.}, } @article {pmid37645873, year = {2023}, author = {Shivakumar, VS and Ahmed, OY and Kovaka, S and Zakeri, M and Langmead, B}, title = {Sigmoni: classification of nanopore signal with a compressed pangenome index.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37645873}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; U01 CA253481/CA/NCI NIH HHS/United States ; }, abstract = {Improvements in nanopore sequencing necessitate efficient classification methods, including pre-filtering and adaptive sampling algorithms that enrich for reads of interest. Signal-based approaches circumvent the computational bottleneck of basecalling. But past methods for signal-based classification do not scale efficiently to large, repetitive references like pangenomes, limiting their utility to partial references or in- dividual genomes. We introduce Sigmoni: a rapid, multiclass classification method based on the r-index that scales to references of hundreds of Gbps. Sigmoni quantizes nanopore signal into a discrete alphabet of picoamp ranges. It performs rapid, approximate matching using matching statistics, classifying reads based on distributions of picoamp matching statistics and co-linearity statistics. Sigmoni is 10-100X faster than previous methods for adaptive sampling in host depletion experiments with improved accuracy, and can query reads against large microbial or human pangenomes.}, } @article {pmid37644736, year = {2023}, author = {Le, VV and Ko, SR and Kang, M and Jeong, S and Oh, HM and Ahn, CY}, title = {Comparative Genome analysis of the Genus Curvibacter and the Description of Curvibacter microcysteis sp. nov. and Curvibacter cyanobacteriorum sp. nov., Isolated from Fresh Water during the Cyanobacterial Bloom Period.}, journal = {Journal of microbiology and biotechnology}, volume = {33}, number = {11}, pages = {1-10}, doi = {10.4014/jmb.2306.06017}, pmid = {37644736}, issn = {1738-8872}, abstract = {The three Gram-negative, catalase- and oxidase-positive bacterial strains RS43[T], HBC28, and HBC61[T], were isolated from fresh water and subjected to a polyphasic study. Comparison of 16S rRNA gene sequence initially indicated that strains RS43[T], HBC28, and HBC61[T] were closely related to species of genus Curvibacter and shared the highest sequence similarity of 98.14%, 98.21%, and 98.76%, respectively, with Curvibacter gracilis 7-1[T]. Phylogenetic analysis based on genome sequences placed all strains within the genus Curvibacter. The average nucleotide identity (ANI) and digital DNA-DNA hybridization (dDDH) values between the three strains and related type strains supported their recognition as two novel genospecies in the genus Curvibacter. Comparative genomic analysis revealed that the genus possessed an open pangenome. Based on KEGG BlastKOALA analyses, Curvibacter species have the potential to metabolize benzoate, phenylacetate, catechol, and salicylate, indicating their potential use in the elimination of these compounds from the water systems. The results of polyphasic characterization indicated that strain RS43[T] and HBC61[T] represent two novel species, for which the name Curvibacter microcysteis sp. nov. (type strain RS43[T] =KCTC 92793T=LMG 32714[T]) and Curvibacter cyanobacteriorum sp. nov. (type strain HBC61[T] =KCTC 92794[T] =LMG 32713[T]) are proposed.}, } @article {pmid37639729, year = {2023}, author = {Prajapati, A and Yogisharadhya, R and Mohanty, NN and Mendem, SK and Chanda, MM and Siddaramappa, S and Shivachandra, SB}, title = {Comparative genome analysis of Pasteurella multocida strains of porcine origin.}, journal = {Genome}, volume = {}, number = {}, pages = {}, doi = {10.1139/gen-2023-0021}, pmid = {37639729}, issn = {1480-3321}, abstract = {Pasteurella multocida causes acute/chronic pasteurellosis in porcine resulting in considerable economic losses globally. The draft genomes of two Indian strains NIVEDIPm17 (serogroup D) and NIVEDIPm36 (serogroup A) were sequenced. A total of 2182- 2284 coding sequences (CDSs) were predicted along with 5-6 rRNA and 45-46 tRNA genes in the genomes. Multi locus sequence analysis and LPS genotyping showed the presence of ST50: genotype 07 and ST74: genotype 06 in NIVEDIPm17 and NIVEDIPm36, respectively. Pangenome analysis of 61 strains showed the presence of 1653 core genes, 167 soft core genes, 750 shell genes, and 1820 cloud genes. Analysis of virulence-associated genes in 61 genomes indicated the presence of nanB, exbB, exbD. ptfA, ompA, ompH, fur, plpB, fimA, sodA, sodC, tonB, and omp87 in all strains. The 61 genomes contained genes encoding tetracycline (54%), streptomycin (48%), sulphonamide (28%), tigecycline (25%), chloramphenicol (21%), amikacin (7%), cephalosporin (5%) and trimethoprim (5%) resistance. MLST revealed that ST50 was the most common (34%), followed by ST74 (26%), ST13 (24%), ST287 (5%), ST09 (5%), ST122 (3%), and ST07 (2%). SNP and core genome-based phylogenetic analysis clustered the strains in to 3 major clusters. In conclusion, we described the various virulence factors, mobile genetic elements and antimicrobial resistance genes in pangenome of P. multocida of porcine origin besides a rare presence of LPS genotype 7 in serogroup D..}, } @article {pmid37636268, year = {2023}, author = {Yang, Z and Guarracino, A and Biggs, PJ and Black, MA and Ismail, N and Wold, JR and Merriman, TR and Prins, P and Garrison, E and de Ligt, J}, title = {Pangenome graphs in infectious disease: a comprehensive genetic variation analysis of Neisseria meningitidis leveraging Oxford Nanopore long reads.}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1225248}, pmid = {37636268}, issn = {1664-8021}, abstract = {Whole genome sequencing has revolutionized infectious disease surveillance for tracking and monitoring the spread and evolution of pathogens. However, using a linear reference genome for genomic analyses may introduce biases, especially when studies are conducted on highly variable bacterial genomes of the same species. Pangenome graphs provide an efficient model for representing and analyzing multiple genomes and their variants as a graph structure that includes all types of variations. In this study, we present a practical bioinformatics pipeline that employs the PanGenome Graph Builder and the Variation Graph toolkit to build pangenomes from assembled genomes, align whole genome sequencing data and call variants against a graph reference. The pangenome graph enables the identification of structural variants, rearrangements, and small variants (e.g., single nucleotide polymorphisms and insertions/deletions) simultaneously. We demonstrate that using a pangenome graph, instead of a single linear reference genome, improves mapping rates and variant calling for both simulated and real datasets of the pathogen Neisseria meningitidis. Overall, pangenome graphs offer a promising approach for comparative genomics and comprehensive genetic variation analysis in infectious disease. Moreover, this innovative pipeline, leveraging pangenome graphs, can bridge variant analysis, genome assembly, population genetics, and evolutionary biology, expanding the reach of genomic understanding and applications.}, } @article {pmid37630674, year = {2023}, author = {Aguirre-Sánchez, JR and Quiñones, B and Ortiz-Muñoz, JA and Prieto-Alvarado, R and Vega-López, IF and Martínez-Urtaza, J and Lee, BG and Chaidez, C}, title = {Comparative Genomic Analyses of Virulence and Antimicrobial Resistance in Citrobacter werkmanii, an Emerging Opportunistic Pathogen.}, journal = {Microorganisms}, volume = {11}, number = {8}, pages = {}, pmid = {37630674}, issn = {2076-2607}, support = {CRIS Project Number 2030-42000-055-00D//United States Department of Agriculture (USDA), Agricultural Research Service (ARS)/ ; Laboratorio Nacional para la Investigación en Inocuidad Alimentaria (LANIIA)//Centro de Investigación y Desarrollo A. C. (CIAD) in Culiacán, Sinaloa/ ; }, abstract = {Citrobacter werkmanii is an emerging and opportunistic human pathogen found in developing countries and is a causative agent of wound, urinary tract, and blood infections. The present study conducted comparative genomic analyses of a C. werkmanii strain collection from diverse geographical locations and sources to identify the relevant virulence and antimicrobial resistance genes. Pangenome analyses divided the examined C. werkmanii strains into five distinct clades; the subsequent classification identified genes with functional roles in carbohydrate and general metabolism for the core genome and genes with a role in secretion, adherence, and the mobilome for the shell and cloud genomes. A maximum-likelihood phylogenetic tree with a heatmap, showing the virulence and antimicrobial genes' presence or absence, demonstrated the presence of genes with functional roles in secretion systems, adherence, enterobactin, and siderophore among the strains belonging to the different clades. C. werkmanii strains in clade V, predominantly from clinical sources, harbored genes implicated in type II and type Vb secretion systems as well as multidrug resistance to aminoglycoside, beta-lactamase, fluoroquinolone, phenicol, trimethoprim, macrolides, sulfonamide, and tetracycline. In summary, these comparative genomic analyses have demonstrated highly pathogenic and multidrug-resistant genetic profiles in C. werkmanii strains, indicating a virulence potential for this commensal and opportunistic human pathogen.}, } @article {pmid37630640, year = {2023}, author = {van der Lee, TAJ and van Gent-Pelzer, MPE and Jonkheer, EM and Brankovics, B and Houwers, IM and van der Wolf, JM and Bonants, PJM and van Duivenbode, I and Vreeburg, RAM and Nas, M and Smit, S}, title = {An Efficient Triplex TaqMan Quantitative PCR to Detect a Blackleg-Causing Lineage of Pectobacterium brasiliense in Potato Based on a Pangenome Analysis.}, journal = {Microorganisms}, volume = {11}, number = {8}, pages = {}, pmid = {37630640}, issn = {2076-2607}, support = {TU-16022//Dutch Ministry of Agriculture, Nature and Food Safety/ ; }, abstract = {P. brasiliense is an important bacterial pathogen causing blackleg (BL) in potatoes. Nevertheless, P. brasiliense is often detected in seed lots that do not develop any of the typical blackleg symptoms in the potato crop when planted. Field bioassays identified that P. brasiliense strains can be categorized into two distinct classes, some able to cause blackleg symptoms and some unable to do it. A comparative pangenomic approach was performed on 116 P. brasiliense strains, of which 15 were characterized as BL-causing strains and 25 as non-causative. In a genetically homogeneous clade comprising all BL-causing P. brasiliense strains, two genes only present in the BL-causing strains were identified, one encoding a predicted lysozyme inhibitor Lprl (LZI) and one encoding a putative Toll/interleukin-1 receptor (TIR) domain-containing protein. TaqMan assays for the specific detection of BL-causing P. brasiliense were developed and integrated with the previously developed generic P. brasiliense assay into a triplex TaqMan assay. This simultaneous detection makes the scoring more efficient as only a single tube is needed, and it is more robust as BL-causing strains of P. brasiliense should be positive for all three assays. Individual P. brasiliense strains were found to be either positive for all three assays or only for the P. brasiliense assay. In potato samples, the mixed presence of BL-causing and not BL-causing P. brasiliense strains was observed as shown by the difference in Ct value of the TaqMan assays. However, upon extension of the number of strains, it became clear that in recent years additional BL-causing lineages of P. brasiliense were detected for which additional assays must be developed.}, } @article {pmid37630590, year = {2023}, author = {Mevada, V and Patel, R and Dudhagara, P and Chaudhari, R and Vohra, M and Khan, V and J H Shyu, D and Chen, YY and Zala, D}, title = {Whole Genome Sequencing and Pan-Genomic Analysis of Multidrug-Resistant Vibrio cholerae VC01 Isolated from a Clinical Sample.}, journal = {Microorganisms}, volume = {11}, number = {8}, pages = {}, pmid = {37630590}, issn = {2076-2607}, abstract = {Cholera, a disease caused by the Vibrio cholerae bacteria, threatens public health worldwide. The organism mentioned above has a significant historical record of being identified as a prominent aquatic environmental pollutant capable of adapting its phenotypic and genotypic traits to react to host patients effectively. This study aims to elucidate the heterogeneity of the sporadic clinical strain of V. cholerae VC01 among patients residing in Silvasa. The study involved conducting whole-genome sequencing of the isolate obtained from patients exhibiting symptoms, including those not commonly observed in clinical practice. The strain was initially identified through a combination of biochemical analysis, microscopy, and 16s rRNA-based identification, followed by type strain-based identification. The investigation demonstrated the existence of various genetic alterations and resistance profiles against multiple drugs, particularly chloramphenicol (catB9), florfenicol (floR), oxytetracycline (tet(34)), sulfonamide (sul2), and Trimethoprim (dfrA1). The pan-genomic analysis indicated that 1099 distinct clusters were detected within the genome sequences of recent isolates worldwide. The present study helps to establish a correlation between the mutation and the coexistence of antimicrobial resistance toward current treatment.}, } @article {pmid37628823, year = {2023}, author = {Li, H and Song, K and Zhang, X and Wang, D and Dong, S and Liu, Y and Yang, L}, title = {Application of Multi-Perspectives in Tea Breeding and the Main Directions.}, journal = {International journal of molecular sciences}, volume = {24}, number = {16}, pages = {}, pmid = {37628823}, issn = {1422-0067}, support = {SDAIT-25-01//The Foundation of Innovation Team Project for Modern Agricultural Industrious Technology System of Shandong Province/ ; YDZX2022123//Special Funds for Local Scientific and Technological Development Guided by the Central Government/ ; }, mesh = {*Plant Breeding ; *Camellia sinensis/genetics ; Crops, Agricultural ; Cytoplasm ; Tea ; }, abstract = {Tea plants are an economically important crop and conducting research on tea breeding contributes to enhancing the yield and quality of tea leaves as well as breeding traits that satisfy the requirements of the public. This study reviews the current status of tea plants germplasm resources and their utilization, which has provided genetic material for the application of multi-omics, including genomics and transcriptomics in breeding. Various molecular markers for breeding were designed based on multi-omics, and available approaches in the direction of high yield, quality and resistance in tea plants breeding are proposed. Additionally, future breeding of tea plants based on single-cellomics, pangenomics, plant-microbe interactions and epigenetics are proposed and provided as references. This study aims to provide inspiration and guidance for advancing the development of genetic breeding in tea plants, as well as providing implications for breeding research in other crops.}, } @article {pmid37623951, year = {2023}, author = {Pitta, JLLP and Bezerra, MF and Fernandes, DLRDS and Block, T and Novaes, AS and Almeida, AMP and Rezende, AM}, title = {Genomic Analysis of Yersinia pestis Strains from Brazil: Search for Virulence Factors and Association with Epidemiological Data.}, journal = {Pathogens (Basel, Switzerland)}, volume = {12}, number = {8}, pages = {}, pmid = {37623951}, issn = {2076-0817}, abstract = {Yersinia pestis, the etiological agent of the plague, is considered a genetically homogeneous species. Brazil is currently in a period of epidemiological silence but plague antibodies are still detected in sentinel animals, suggesting disease activity in the sylvatic cycle. The present study deployed an in silico approach to analyze virulence factors among 407 Brazilian genomes of Y. pestis belonging to the Fiocruz Collection (1966-1997). The pangenome analysis associated several known virulence factors of Y. pestis in clades according to the presence or absence of genes. Four main strain clades (C, E, G, and H) exhibited the absence of various virulence genes. Notably, clade G displayed the highest number of absent genes, while clade E showed a significant absence of genes related to the T6SS secretion system and clade H predominantly demonstrated the absence of plasmid-related genes. These results suggest attenuation of virulence in these strains over time. The cgMLST analysis associated genomic and epidemiological data highlighting evolutionary patterns related to the isolation years and outbreaks of Y. pestis in Brazil. Thus, the results contribute to the understanding of the genetic diversity and virulence within Y. pestis and the potential for utilizing genomic data in epidemiological investigations.}, } @article {pmid37620118, year = {2023}, author = {Horsfield, ST and Tonkin-Hill, G and Croucher, NJ and Lees, JA}, title = {Accurate and fast graph-based pangenome annotation and clustering with ggCaller.}, journal = {Genome research}, volume = {}, number = {}, pages = {}, doi = {10.1101/gr.277733.123}, pmid = {37620118}, issn = {1549-5469}, abstract = {Bacterial genomes differ in both gene content and sequence mutations, which underlies extensive phenotypic diversity, including variation in susceptibility to antimicrobials or vaccine-induced immunity. To identify and quantify important variants, all genes within a population must be predicted, functionally annotated and clustered, representing the pangenome. Despite the volume of genome data available, gene prediction and annotation are currently conducted in isolation on individual genomes, which is computationally inefficient and frequently inconsistent across genomes. Here, we introduce the open-source software graph-gene-caller (ggCaller). ggCaller combines gene prediction, functional annotation, and clustering into a single workflow using population-wide de Bruijn Graphs, removing redundancy in gene annotation, and resulting in more accurate gene predictions and orthologue clustering. We applied ggCaller to simulated and real-world bacterial datasets containing hundreds or thousands of genomes, comparing it to current state-of-the-art tools. ggCaller has considerable speed-ups with equivalent or greater accuracy, particularly with datasets containing complex sources of error, such as assembly contamination or fragmentation. ggCaller is also an important extension to bacterial genome-wide association studies, enabling querying of annotated graphs for functional analyses. We highlight this application by functionally annotating DNA sequences with significant associations to tetracycline and macrolide resistance in Streptococcus pneumoniae, identifying key resistance determinants that were missed when using only a single reference genome. ggCaller is a novel bacterial genome analysis tool with applications in bacterial evolution and epidemiology.}, } @article {pmid37612339, year = {2023}, author = {Jang, J and Jung, J and Lee, YH and Lee, S and Baik, M and Kim, H}, title = {Chromosome-level genome assembly of Korean native cattle and pangenome graph of 14 Bos taurus assemblies.}, journal = {Scientific data}, volume = {10}, number = {1}, pages = {560}, pmid = {37612339}, issn = {2052-4463}, support = {NRF-2021R1A2C2094111//National Research Foundation of Korea (NRF)/ ; }, mesh = {Animals ; *Cattle/genetics ; Humans ; *Asian People ; Chromosomes/genetics ; Republic of Korea ; *Tandem Repeat Sequences ; *Genome ; }, abstract = {This study presents the first chromosome-level genome assembly of Hanwoo, an indigenous Korean breed of Bos taurus taurus. This is the first genome assembly of Asian taurus breed. Also, we constructed a pangenome graph of 14 B. taurus genome assemblies. The contig N50 was over 55 Mb, the scaffold N50 was over 89 Mb and a genome completeness of 95.8%, as estimated by BUSCO using the mammalian set, indicated a high-quality assembly. 48.7% of the genome comprised various repetitive elements, including DNAs, tandem repeats, long interspersed nuclear elements, and simple repeats. A total of 27,314 protein-coding genes were identified, including 25,302 proteins with inferred gene names and 2,012 unknown proteins. The pangenome graph of 14 B. taurus autosomes revealed 528.47 Mb non-reference regions in total and 61.87 Mb Hanwoo-specific regions. Our Hanwoo assembly and pangenome graph provide valuable resources for studying B. taurus populations.}, } @article {pmid37610465, year = {2023}, author = {Szuhaj, M and Kakuk, B and Wirth, R and Rákhely, G and Kovács, KL and Bagi, Z}, title = {Regulation of the methanogenesis pathways by hydrogen at transcriptomic level in time.}, journal = {Applied microbiology and biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37610465}, issn = {1432-0614}, support = {2020-3.1.2-ZFR-KVG-2020-00009//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; K143198//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; FK123902//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; 2019-2.1.13-TÉT_IN-2020-00016//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; PD 132145//Nemzeti Kutatási Fejlesztési és Innovációs Hivatal/ ; }, abstract = {The biomethane formation from 4 H2 + CO2 by pure cultures of two methanogens, Methanocaldococcus fervens and Methanobacterium thermophilum, has been studied. The goal of the study was to understand the regulation of the enzymatic steps associated with biomethane biosynthesis by H2, using metagenomic, pan-genomic, and transcriptomic approaches. Methanogenesis in the autotrophic methanogen M. fervens could be easily "switched off" and "switched on" by H2/CO2 within about an hour. In contrast, the heterotrophic methanogen M. thermophilum was practically insensitive to the addition of the H2/CO2 trigger although this methanogen also converted H2/CO2 to CH4. From practical points of view, the regulatory function of H2/CO2 suggests that in the power-to-gas (P2G) renewable excess electricity conversion and storage systems, the composition of the biomethane-generating methanogenic community is essential for sustainable operation. In addition to managing the specific hydrogenotrophic methanogenesis biochemistry, H2/CO2 affected several, apparently unrelated, metabolic pathways. The redox-regulated overall biochemistry and symbiotic relationships in the methanogenic communities should be explored in order to make the P2G technology more efficient. KEY POINTS : • Hydrogenotrophic methanogens may respond distinctly to H2/CO2 in bio-CH4 formation. • H2/CO2 can also activate metabolic routes, which are apparently unrelated to methanogenesis. • Sustainable conversion of the fluctuating renewable electricity to bio-CH4 is an option.}, } @article {pmid37599459, year = {2023}, author = {Alsaiari, AA and Hakami, MA and Alotaibi, BS and Alkhalil, SS and Alkhorayef, N and Khan, K and Jalal, K}, title = {Delineating multi-epitopes vaccine designing from membrane protein CL5 against all monkeypox strains: a pangenome reverse vaccinology approach.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-22}, doi = {10.1080/07391102.2023.2248301}, pmid = {37599459}, issn = {1538-0254}, abstract = {The recently identified monkeypox virus (MPXV or mpox) is a zoonotic orthopox virus that infects humans and causes diseases with traits like smallpox. The world health organization (WHO) estimates that 3-6% of MPXV cases result in death. As it might impact everyone globally, like COVID, and become the next pandemic, the cure for this disease is important for global public health. The high incidence and disease ratio of MPXV necessitates immediate efforts to design a unique vaccine candidate capable of addressing MPXV diseases. Here, we used a computational pan-genome-based vaccine design strategy for all currently reported 19 MPXV strains acquired from different regions of the world. Thus, this study's objective was to develop a new and safe vaccine candidate against MPXV by targeting the membrane CL5 protein; identified after the pangenome analysis. Proteomics and reverse vaccinology have covered up all of the MPXV epitopes that would usually stimulate robust host immune responses. Following this, only two mapped (MHC-I, MHC-II, and B-cell) epitopes were observed to be extremely effective that can be used in the construction of CL5 protein vaccine candidates. The suggested vaccine (V5) candidate from eight vaccine models was shown to be antigenic, non-allergenic, and stable (with 213 amino acids). The vaccine's candidate efficacy was evaluated by using many in silico methods to predict, improve, and validate its 3D structure. Molecular docking and molecular dynamics simulations further reveal that the proposed vaccine candidate ensemble has a high interaction energy with the HLAs and TRL2/4 immunological receptors under study. Later, the vaccine sequence was used to generate an expression vector for the E. coli K12 strain. Further study uncovers that V5 was highly immunogenic because it produced robust primary, secondary, and tertiary immune responses. Eventually, the use of computer-aided vaccine designing may significantly reduce costs and speed up the process of developing vaccines. Although, the results of this research are promising, however, more research (experimental; in vivo, and in vitro studies) is needed to verify the biological efficacy of the proposed vaccine against MPXV.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37596715, year = {2023}, author = {Sun, Y and Zheng, C and Zhou, J and Zhen, M and Wei, X and Yan, X and Guo, X and Zheng, L and Shao, M and Li, C and Qin, D and Zhang, J and Xiong, L and Xing, J and Huang, B and Dong, Z and Cheng, P and Yu, G}, title = {Pathogen Profile of Klebsiella variicola, the Causative Agent of Banana Sheath Rot.}, journal = {Plant disease}, volume = {107}, number = {8}, pages = {2325-2334}, doi = {10.1094/PDIS-09-22-2018-RE}, pmid = {37596715}, issn = {0191-2917}, mesh = {Animals ; Humans ; *Musa ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; China ; Klebsiella/genetics ; Endophytes ; }, abstract = {Banana (Musa spp.) is an important fruit and food crop worldwide. In recent years, banana sheath rot has become a major problem in banana cultivation, causing plant death and substantial economic losses. Nevertheless, the pathogen profile of this disease has not been fully characterized. Klebsiella variicola is a versatile bacterium capable of colonizing different hosts, such as plants, humans, insects, and animals, and is recognized as an emerging pathogen in various hosts. In this study, we obtained 12 bacterial isolates from 12 different banana samples showing banana sheath rot in Guangdong and Guangxi Provinces, China. Phylogenetic analysis based on 16S rRNA sequences confirmed that all 12 isolates were K. variicola strains. We sequenced the genomes of these strains, performed comparative genomic analysis with other sequenced K. variicola strains, and found a lack of consistency in accessory gene content among these K. variicola strains. However, prediction based on the pan-genome of K. variicola revealed 22 unique virulence factors carried by the 12 pathogenic K. variicola isolates. Microbiome and microbial interaction network analysis of endophytes between the healthy tissues of diseased plants and healthy plants of two cultivars showed that Methanobacterium negatively interacts with Klebsiella in banana plants and that Herbaspirillum might indirectly inhibit Methanobacterium to promote Klebsiella growth. These results suggest that banana sheath rot is caused by the imbalance of plant endophytes and opportunistic pathogenic bacteria, providing an important basis for research and control of this disease.[Formula: see text] Copyright © 2023 The Author(s). This is an open access article distributed under the CC BY-NC-ND 4.0 International license.}, } @article {pmid37596178, year = {2023}, author = {Mertz, P and Hentgen, V and Boursier, G and Delon, J and Georgin-Lavialle, S}, title = {[Monogenic auto-inflammatory diseases associated with actinopathies: A review of the literature].}, journal = {La Revue de medecine interne}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.revmed.2023.06.005}, pmid = {37596178}, issn = {1768-3122}, abstract = {Auto-inflammatory diseases (AIDs) are diseases resulting from an inappropriate activation of innate immunity in the absence of any infection. The field of monogenic AIDs is constantly expanding, with the discovery of new pathologies and pathophysiological mechanisms thanks to pangenomic sequencing. Actinopathies with auto-inflammatory manifestations are a new emerging group of AIDs, linked to defects in the regulation of the actin cytoskeleton dynamics. These diseases most often begin in the neonatal period and combine to varying degrees a more or less severe primary immune deficiency, cytopenias (especially thrombocytopenia), auto-inflammatory manifestations (especially cutaneous and digestive), atopic and auto-immune manifestations. The diagnosis is to be evoked essentially in front of a cutaneous-digestive auto-inflammation picture of early onset, associated with a primary immune deficiency and thrombocytopenia or a tendency to bleed. Some of these diseases have specificities, including a risk of macrophagic activation syndrome or a tendency to atopy or lymphoproliferation. We propose here a review of the literature on these new diseases, with a proposal for a practical approach according to the main associated biological abnormalities and some clinical particularities. However, the diagnosis remains genetic, and several differential diagnoses must be considered. The pathophysiology of these diseases is not yet fully elucidated, and studies are needed to better clarify the inherent mechanisms that can guide the choice of therapies. In most cases, the severity of the picture indicates allogeneic marrow transplantation.}, } @article {pmid37594286, year = {2023}, author = {Kim, M and Cha, IT and Lee, KE and Li, M and Park, SJ}, title = {Pangenome analysis provides insights into the genetic diversity, metabolic versatility, and evolution of the genus Flavobacterium.}, journal = {Microbiology spectrum}, volume = {}, number = {}, pages = {e0100323}, doi = {10.1128/spectrum.01003-23}, pmid = {37594286}, issn = {2165-0497}, abstract = {Members of the genus Flavobacterium are widely distributed and produce various polysaccharide-degrading enzymes. Many species in the genus have been isolated and characterized. However, few studies have focused on marine isolates or fish pathogens, and in-depth genomic analyses, particularly comparative analyses of isolates from different habitat types, are lacking. Here, we isolated 20 strains of the genus from various environments in South Korea and sequenced their full-length genomes. Combined with published sequence data, we examined genomic traits, evolution, environmental adaptation, and putative metabolic functions in total 187 genomes of isolated species in Flavobacterium categorized as marine, host-associated, and terrestrial including freshwater. A pangenome analysis revealed a correlation between genome size and coding or noncoding density. Flavobacterium spp. had high levels of diversity, allowing for novel gene repertories via recombination events. Defense-related genes only accounted for approximately 3% of predicted genes in all Flavobacterium genomes. While genes involved in metabolic pathways did not differ with respect to isolation source, there was substantial variation in genomic traits; in particular, the abundances of tRNAs and rRNAs were higher in the host-associdated group than in other groups. One genome in the host-associated group contained a Microviridae prophage closely related to an enterobacteria phage. The proteorhodopsin gene was only identified in four terrestrial strains isolated for this study. Furthermore, recombination events clearly influenced genomic diversity and may contribute to the response to environmental stress. These findings shed light on the high genetic variation in Flavobacterium and functional roles in diverse ecosystems as a result of their metabolic versatility. IMPORTANCE The genus Flavobacterium is a diverse group of bacteria that are found in a variety of environments. While most species of this genus are harmless and utilize organic substrates such as proteins and polysaccharides, some members may play a significant role in the cycling for organic substances within their environments. Nevertheless, little is known about the genomic dynamics and/or metabolic capacity of Flavobacterium. Here, we found that Flavobacterium species may have an open pangenome, containing a variety of diverse and novel gene repertoires. Intriguingly, we discovered that one genome (classified into host-associated group) contained a Microviridae prophage closely related to that of enterobacteria. Proteorhodopsin may be expressed under conditions of light or oxygen pressure in some strains isolated for this study. Our findings significantly contribute to the understanding of the members of the genus Flavobacterium diversity exploration and will provide a framework for the way for future ecological characterizations.}, } @article {pmid37592233, year = {2023}, author = {Zhang, X and Xiao, L and Liu, J and Tian, Q and Xie, J}, title = {Trade-off in genome turnover events leading to adaptive evolution of Microcystis aeruginosa species complex.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {462}, pmid = {37592233}, issn = {1471-2164}, support = {32101368//National Natural Science Foundation of China/ ; 2022YFE0119600//National Key Research and Development Program of China/ ; }, mesh = {*Microcystis/genetics ; Genome-Wide Association Study ; *Bacteriophages ; Energy Metabolism ; Evolution, Molecular ; }, abstract = {BACKGROUND: Numerous studies in the past have expanded our understanding of the genetic differences of global distributed cyanobacteria that originated around billions of years ago, however, unraveling how gene gain and loss drive the genetic evolution of cyanobacterial species, and the trade-off of these evolutionary forces are still the central but poorly understood issues.

RESULTS: To delineate the contribution of gene flow in mediating the hereditary differentiation and shaping the microbial evolution, a global genome-wide study of bloom-forming cyanobacterium, Microcystis aeruginosa species complex, provided robust evidence for genetic diversity, reflected by enormous variation in gene repertoire among various strains. Mathematical extrapolation showed an 'open' microbial pan-genome of M. aeruginosa species, since novel genes were predicted to be introduced after new genomes were sequenced. Identification of numerous horizontal gene transfer's signatures in genome regions of interest suggested that genome expansion via transformation and phage-mediated transduction across bacterial lineage as an evolutionary route may contribute to the differentiation of Microcystis functions (e.g., carbohydrate metabolism, amino acid metabolism, and energy metabolism). Meanwhile, the selective loss of some dispensable genes at the cost of metabolic versatility is as a mean of adaptive evolution that has the potential to increase the biological fitness.

CONCLUSIONS: Now that the recruitment of novel genes was accompanied by a parallel loss of some other ones, a trade-off in gene content may drive the divergent differentiation of M. aeruginosa genomes. Our study provides a genetic framework for the evolution of M. aeruginosa species and illustrates their possible evolutionary patterns.}, } @article {pmid37587248, year = {2023}, author = {Pei, Z and Li, X and Cui, S and Yang, B and Lu, W and Zhao, J and Mao, B and Chen, W}, title = {Population genomics of Lacticaseibacillus paracasei: pan-genome, integrated prophage, antibiotic resistance, and carbohydrate utilization.}, journal = {World journal of microbiology & biotechnology}, volume = {39}, number = {10}, pages = {280}, pmid = {37587248}, issn = {1573-0972}, support = {32172173//National Natural Science Foundation of China/ ; 31972086//National Natural Science Foundation of China/ ; 2021YFD2100700//Key Technologies Research and Development Program/ ; }, mesh = {Humans ; *Lacticaseibacillus paracasei ; Metagenomics ; Lacticaseibacillus ; Prophages/genetics ; Drug Resistance, Microbial ; Carbohydrates ; }, abstract = {Lacticaseibacillus paracasei has beneficial effects on human health and holds promising potential as a probiotic for use in the development of functional foods, especially dairy products. This species can adapt to a variety of ecological niches and presents fundamental carbohydrate metabolism and tolerance to environmental stresses. However, the population structure, ecology, and antibiotic resistance of Lc. paracasei in diverse ecological niches are poorly understood. Reclassification of Lc. paracasei as a separate species of Lacticaseibacillus has stimulated renewed interest in its research, and a deeper interpretation of it will be important for screening strains beneficial to human health. Here, we collected 121 self-isolated and 268 publicly available Lc. paracasei genomes discussed how genomic approaches have advanced our understanding of its taxonomy, ecology, evolution, diversity, integrated prophage-related element distribution, antibiotic resistance, and carbohydrate utilization. Moreover, for the Lc. paracasei strains isolated in this study, we assessed the inducibility of integrated prophages in their genomes and determined the phenotypes that presented tolerance to multiple antibiotics to provide evidence for safety evaluations of Lc. paracasei during the fermentation processes.}, } @article {pmid37580659, year = {2023}, author = {Ma, C and Li, M and Peng, H and Lan, M and Tao, L and Li, C and Wu, C and Bai, H and Zhong, Y and Zhong, S and Qin, R and Li, F and Li, J and He, J}, title = {Mesomycoplasma ovipneumoniae from goats with respiratory infection: pathogenic characteristics, population structure, and genomic features.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {220}, pmid = {37580659}, issn = {1471-2180}, mesh = {Animals ; Sheep ; Goats ; *Mycoplasma ovipneumoniae/genetics ; Phylogeny ; Genome-Wide Association Study ; *Respiratory Tract Infections/veterinary ; Genomics ; *Pneumonia, Mycoplasma/pathology/veterinary ; *Sheep Diseases ; }, abstract = {BACKGROUND: Mycoplasma ovipneumoniae is a critical pathogen that causes respiratory diseases that threaten Caprini health and cause economic damage. A genome-wide study of M. ovipneumoniae will help understand the pathogenic characteristics of this microorganism.

RESULTS: Toxicological pathology and whole-genome sequencing of nine M. ovipneumoniae strains isolated from goats were performed using an epidemiological survey. These strains exhibited anterior ventral lung consolidation, typical of bronchopneumonia in goats. Average nucleotide identity and phylogenetic analysis based on whole-genome sequences showed that all M. ovipneumoniae strains clustered into two clades, largely in accordance with their geographical origins. The pan-genome of the 23 M. ovipneumoniae strains contained 5,596 genes, including 385 core, 210 soft core, and 5,001 accessory genes. Among these genes, two protein-coding genes were annotated as cilium adhesion and eight as paralog surface adhesins when annotated to VFDB, and no antibiotic resistance-related genes were predicted. Additionally, 23 strains carried glucosidase-related genes (ycjT and group_1595) and glucosidase-related genes (atpD_2), indicating that M. ovipneumoniae possesses a wide range of glycoside hydrolase activities.

CONCLUSIONS: The population structure and genomic features identified in this study will facilitate further investigations into the pathogenesis of M. ovipneumoniae and lay the foundation for the development of preventive and therapeutic methods.}, } @article {pmid37580306, year = {2023}, author = {Alexandrov, N and Wang, T and Blair, L and Nadon, B and Sayer, D}, title = {HLA-OLI: A new MHC class I pseudogene and HLA-Y are located on a 60 kb indel in the human MHC between HLA-W and HLA-J.}, journal = {HLA}, volume = {}, number = {}, pages = {}, doi = {10.1111/tan.15180}, pmid = {37580306}, issn = {2059-2310}, abstract = {Analysis of publicly available whole-genome sequence data from the Human Pangenome Project and the 1000 Genomes Project has identified a DNA segment of approximately 60 kb in the major histocompatibility complex (MHC) between HLA-W and HLA-J that is present in some MHC haplotypes but not others. This DNA segment is largely repeat element-rich but includes the pseudogene HLA-Y, thus pinpointing the location of this pseudogene, and a new HLA class I sequence we have called HLA-OLI. HLA-OLI clusters phylogenetically with the HLA class I pseudogenes, HLA-P and HLA-W, and appears to have a similar genetic structure. The availability of whole-genome sequence data from diverse populations enables a detailed characterization of the MHC at the population level and will have implications for understanding MHC disease associations and the non-HLA MHC factors that impact unrelated hematopoietic cell transplant outcomes.}, } @article {pmid37578072, year = {2023}, author = {Khan, K and Burki, S and Alsaiari, AA and Alhuthali, HM and Alharthi, NS and Jalal, K}, title = {A therapeutic epitopes-based vaccine engineering against Salmonella enterica XDR strains for typhoid fever: a Pan-vaccinomics approach.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-15}, doi = {10.1080/07391102.2023.2246587}, pmid = {37578072}, issn = {1538-0254}, abstract = {A prevalent food-borne pathogen, Salmonella enterica serotypes Typhi, is responsible for gastrointestinal and systemic infections globally. Salmonella vaccines are the most effective, however, producing a broad-spectrum vaccine remains challenging due to Salmonella's many serotypes. Efforts are urgently required to develop a novel vaccine candidate that can tackle all S. Typhi strains because of their high resistance to multiple kinds of antibiotics (particularly the XDR H58 strain). In this work, we used a computational pangenome-based vaccine design technique on all available (n = 119) S. Typhi reference genomes and identified one TonB-dependent siderophore receptor (WP_001034967.1) as highly conserved and prospective vaccine candidates from the predicted core genome (n = 3,351). The applied pan-proteomics and Immunoinformatic approaches help in the identification of four epitopes that may trigger adequate host body immune responses. Furthermore, the proposed vaccine ensemble demonstrates a stable binding conformation with the examined immunological receptor (HLAs and TRL2/4) and has large interaction energy determined via molecular docking and molecular dynamics simulation techniques. Eventually, an expression vector for the Escherichia. coli K12 strain was constructed from the vaccine sequence. Additional analysis revealed that the vaccine may help to elicit strong immune responses for typhoid infections, however, experimental analysis is required to verify the vaccine's effectiveness based on these results. Moreover, the applied computer-assisted vaccine design may considerably decrease vaccine development costs and speed up the process. The study's findings are intriguing, but they must be evaluated in the experimental labs to confirm the developed vaccine's biological efficiency against XDR S. Typhi.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37577683, year = {2023}, author = {Yocca, AE and Platts, A and Alger, E and Teresi, S and Mengist, MF and Benevenuto, J and Felipe V Ferrão, L and Jacobs, M and Babinski, M and Magallanes-Lundback, M and Bayer, P and Golicz, A and Humann, JL and Main, D and Espley, RV and Chagné, D and Albert, NW and Montanari, S and Vorsa, N and Polashock, J and Díaz-Garcia, L and Zalapa, J and Bassil, NV and Munoz, PR and Iorizzo, M and Edger, PP}, title = {Blueberry and cranberry pangenomes as a resource for future genetic studies and breeding efforts.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37577683}, support = {T32 GM110523/GM/NIGMS NIH HHS/United States ; }, abstract = {Domestication of cranberry and blueberry began in the United States in the early 1800s and 1900s, respectively, and in part owing to their flavors and health-promoting benefits are now cultivated and consumed worldwide. The industry continues to face a wide variety of production challenges (e.g. disease pressures) as well as a demand for higher-yielding cultivars with improved fruit quality characteristics. Unfortunately, molecular tools to help guide breeding efforts for these species have been relatively limited compared with those for other high-value crops. Here, we describe the construction and analysis of the first pangenome for both blueberry and cranberry. Our analysis of these pangenomes revealed both crops exhibit great genetic diversity, including the presence-absence variation of 48.4% genes in highbush blueberry and 47.0% genes in cranberry. Auxiliary genes, those not shared by all cultivars, are significantly enriched with molecular functions associated with disease resistance and the biosynthesis of specialized metabolites, including compounds previously associated with improving fruit quality traits. The discovery of thousands of genes, not present in the previous reference genomes for blueberry and cranberry, will serve as the basis of future research and as potential targets for future breeding efforts. The pangenome, as a multiple-sequence alignment, as well as individual annotated genomes, are publicly available for analysis on the Genome Database for Vaccinium - a curated and integrated web-based relational database. Lastly, the core-gene predictions from the pangenomes will serve useful to develop a community genotyping platform to guide future molecular breeding efforts across the family.}, } @article {pmid37576785, year = {2023}, author = {Qiu, J and Shi, Y and Zhao, F and Xu, Y and Xu, H and Dai, Y and Cao, Y}, title = {The Pan-Genomic Analysis of Corynebacterium striatum Revealed its Genetic Characteristics as an Emerging Multidrug-Resistant Pathogen.}, journal = {Evolutionary bioinformatics online}, volume = {19}, number = {}, pages = {11769343231191481}, pmid = {37576785}, issn = {1176-9343}, abstract = {Corynebacterium striatum is a Gram-positive bacterium that is straight or slightly curved and non-spore-forming. Although it was originally believed to be a part of the normal microbiome of human skin, a growing number of studies have identified it as a cause of various chronic diseases, bacteremia, and respiratory infections. However, despite its increasing importance as a pathogen, the genetic characteristics of the pathogen population, such as genomic characteristics and differences, the types of resistance genes and virulence factors carried by the pathogen and their distribution in the population are poorly understood. To address these knowledge gaps, we conducted a pan-genomic analysis of 314 strains of C. striatum isolated from various tissues and geographic locations. Our analysis revealed that C. striatum has an open pan-genome, comprising 5692 gene families, including 1845 core gene families, 2362 accessory gene families, and 1485 unique gene families. We also found that C. striatum exhibits a high degree of diversity across different sources, but strains isolated from skin tissue are more conserved. Furthermore, we identified 53 drug resistance genes and 42 virulence factors by comparing the strains to the drug resistance gene database (CARD) and the pathogen virulence factor database (VFDB), respectively. We found that these genes and factors are widely distributed among C. striatum, with 77.7% of strains carrying 2 or more resistance genes and displaying primary resistance to aminoglycosides, tetracyclines, lincomycin, macrolides, and streptomycin. The virulence factors are primarily associated with pathogen survival within the host, iron uptake, pili, and early biofilm formation. In summary, our study provides insights into the population diversity, resistance genes, and virulence factors ofC. striatum from different sources. Our findings could inform future research and clinical practices in the diagnosis, prevention, and treatment of C. striatum-associated diseases.}, } @article {pmid37576287, year = {2023}, author = {Liu, L and Yu, W and Cai, K and Ma, S and Wang, Y and Ma, Y and Zhao, H}, title = {Identification of vaccine candidates against rhodococcus equi by combining pangenome analysis with a reverse vaccinology approach.}, journal = {Heliyon}, volume = {9}, number = {8}, pages = {e18623}, pmid = {37576287}, issn = {2405-8440}, abstract = {Rhodococcus equi (R. equi) is a zoonotic opportunistic pathogen that can cause life-threatening infections. The rapid evolution of multidrug-resistant R. equi and the fact that there is no currently licensed effective vaccine against R. equi warrant the need for vaccine development. Reverse vaccinology (RV), which involves screening a pathogen's entire genome and proteome using various web-based prediction tools, is considered one of the most effective approaches for identifying vaccine candidates. Here, we performed a pangenome analysis to determine the core proteins of R. equi. We then used the RV approach to examine the subcellular localization, host and gut flora homology, antigenicity, transmembrane helices, physicochemical properties, and immunogenicity of the core proteins to select potential vaccine candidates. The vaccine candidates were then subjected to epitope mapping to predict the exposed antigenic epitopes that possess the ability to bind with major histocompatibility complex I/II (MHC I/II) molecules. These vaccine candidates and epitopes will form a library of elements for the development of a polyvalent or universal vaccine against R. equi. Sixteen R. equi complete proteomes were found to contain 6,238 protein families, and the core proteins consisted of 3,969 protein families (∼63.63% of the pangenome), reflecting a low degree of intraspecies genomic variability. From the pool of core proteins, 483 nonhost homologous membrane and extracellular proteins were screened, and 12 vaccine candidates were finally identified according to their antigenicity, physicochemical properties and other factors. These included four cell wall/membrane/envelope biogenesis proteins; four amino acid transport and metabolism proteins; one cell cycle control, cell division and chromosome partitioning protein; one carbohydrate transport and metabolism protein; one secondary metabolite biosynthesis, transport and catabolism protein; and one defense mechanism protein. All 12 vaccine candidates have an experimentally validated 3D structure available in the protein data bank (PDB). Epitope mapping of the candidates showed that 16 MHC I epitopes and 13 MHC II epitopes with the strongest immunogenicity were exposed on the protein surface, indicating that they could be used to develop a polypeptide vaccine. Thus, we utilized an analytical strategy that combines pangenome analysis and RV to generate a peptide antigen library that simplifies the development of multivalent or universal vaccines against R. equi and can be applied to the development of other vaccines.}, } @article {pmid37575187, year = {2023}, author = {Chao, KH and Chen, PW and Seshia, SA and Langmead, B}, title = {WGT: Tools and algorithms for recognizing, visualizing, and generating Wheeler graphs.}, journal = {iScience}, volume = {26}, number = {8}, pages = {107402}, pmid = {37575187}, issn = {2589-0042}, abstract = {A Wheeler graph represents a collection of strings in a way that is particularly easy to index and query. Such a graph is a practical choice for representing a graph-shaped pangenome, and it is the foundation for current graph-based pangenome indexes. However, there are no practical tools to visualize or to check graphs that may have the Wheeler properties. Here, we present Wheelie, an algorithm that combines a renaming heuristic with a permutation solver (Wheelie-PR) or a Satisfiability Modulo Theory (SMT) solver (Wheelie-SMT) to check whether a given graph has the Wheeler properties, a problem that is NP-complete in general. Wheelie can check a variety of random and real-world graphs in far less time than any algorithm proposed to date. It can check a graph with 1,000s of nodes in seconds. We implement these algorithms together with complementary visualization tools in the WGT toolkit, available as open source software at https://github.com/Kuanhao-Chao/Wheeler_Graph_Toolkit.}, } @article {pmid37573136, year = {2023}, author = {Kokate, PP and Bales, E and Joyner, D and Hazen, TC and Techtmann, SM}, title = {Biogeographic patterns in populations of marine Pseudoalteromonas atlantica isolates.}, journal = {FEMS microbiology letters}, volume = {370}, number = {}, pages = {}, doi = {10.1093/femsle/fnad081}, pmid = {37573136}, issn = {1574-6968}, mesh = {Phylogeny ; *Pseudoalteromonas ; Biodiversity ; }, abstract = {Intra-specific genomic diversity is well documented in microbes. The question, however, remains whether natural selection or neutral evolution is the major contributor to this diversity. We undertook this study to estimate genomic diversity in Pseudoalteromonas atlantica populations and whether the diversity, if present, could be attributed to environmental factors or distance effects. We isolated and sequenced twenty-three strains of P. atlantica from three geographically distant deep marine basins and performed comparative genomic analyses to study the genomic diversity of populations among these basins. Average nucleotide identity followed a strictly geographical pattern. In two out of three locations, the strains within the location exhibited >99.5% identity, whereas, among locations, the strains showed <98.11% identity. Phylogenetic and pan-genome analysis also reflected the biogeographical separation of the strains. Strains from the same location shared many accessory genes and clustered closely on the phylogenetic tree. Phenotypic diversity between populations was studied in ten out of twenty-three strains testing carbon and nitrogen source utilization and osmotolerance. A genetic basis for phenotypic diversity could be established in most cases but was apparently not influenced by local environmental conditions. Our study suggests that neutral evolution may have a substantial role in the biodiversity of P. atlantica.}, } @article {pmid37571822, year = {2023}, author = {Raza, A and Bohra, A and Garg, V and Varshney, RK}, title = {Back to wild relatives for future breeding through super-pangenome.}, journal = {Molecular plant}, volume = {16}, number = {9}, pages = {1363-1365}, doi = {10.1016/j.molp.2023.08.005}, pmid = {37571822}, issn = {1752-9867}, mesh = {*Genomics ; *Plant Breeding ; }, } @article {pmid37567624, year = {2023}, author = {Rajput, A and Chauhan, SM and Mohite, OS and Hyun, JC and Ardalani, O and Jahn, LJ and Sommer, MO and Palsson, BO}, title = {Pangenome analysis reveals the genetic basis for taxonomic classification of the Lactobacillaceae family.}, journal = {Food microbiology}, volume = {115}, number = {}, pages = {104334}, doi = {10.1016/j.fm.2023.104334}, pmid = {37567624}, issn = {1095-9998}, mesh = {*Lactobacillaceae ; *Genomics ; Phylogeny ; }, abstract = {Lactobacillaceae represent a large family of important microbes that are foundational to the food industry. Many genome sequences of Lactobacillaceae strains are now available, enabling us to conduct a comprehensive pangenome analysis of this family. We collected 3591 high-quality genomes from public sources and found that: 1) they contained enough genomes for 26 species to perform a pangenomic analysis, 2) the normalized Heap's coefficient λ (a measure of pangenome openness) was found to have an average value of 0.27 (ranging from 0.07 to 0.37), 3) the pangenome openness was correlated with the abundance and genomic location of transposons and mobilomes, 4) the pangenome for each species was divided into core, accessory, and rare genomes, that highlight the species-specific properties (such as motility and restriction-modification systems), 5) the pangenome of Lactiplantibacillus plantarum (which contained the highest number of genomes found amongst the 26 species studied) contained nine distinct phylogroups, and 6) genome mining revealed a richness of detected biosynthetic gene clusters, with functions ranging from antimicrobial and probiotic to food preservation, but ∼93% were of unknown function. This study provides the first in-depth comparative pangenomics analysis of the Lactobacillaceae family.}, } @article {pmid37556679, year = {2023}, author = {Hill, H and Mitsi, E and Nikolaou, E and Blizard, A and Pojar, S and Howard, A and Hyder-Wright, A and Devin, J and Reiné, J and Robinson, R and Solórzano, C and Jochems, SP and Kenny-Nyazika, T and Ramos-Sevillano, E and Weight, CM and Myerscough, C and McLenaghan, D and Morton, B and Gibbons, E and Farrar, M and Randles, V and Burhan, H and Chen, T and Shandling, AD and Campo, JJ and Heyderman, RS and Gordon, SB and Brown, JS and Collins, AM and Ferreira, DM}, title = {A Randomised Controlled Trial of Nasal Immunisation with Live Virulence Attenuated Streptococcus pneumoniae Strains Using Human Infection Challenge.}, journal = {American journal of respiratory and critical care medicine}, volume = {}, number = {}, pages = {}, doi = {10.1164/rccm.202302-0222OC}, pmid = {37556679}, issn = {1535-4970}, abstract = {RATIONALE: Pneumococcal pneumonia remains a global health problem. Pneumococcal colonisation increases local and systemic protective immunity, suggesting nasal administration of live attenuated S. pneumoniae strains could help prevent infections.

OBJECTIVES: We used a controlled human infection model to investigate whether nasopharyngeal colonisation with attenuated S. pneumoniae strains protected against re-colonisation with wild-type (WT) S. pneumoniae (Spn).

METHODS: Healthy adults aged 18-50 years were randomised (1:1:1:1) for nasal administration twice (two weeks interval) with saline, WT Spn6B (BHN418) or one of two genetically modified Spn6B strains - SpnA1 (∆fhs/piaA) or SpnA3 (∆proABC/piaA) (Stage I). After 6 months, participants were challenged with SpnWT to assess protection against the homologous serotype (Stage II).

MEASUREMENTS AND MAIN RESULTS: 125 participants completed both study stages as per intention to treat. No Serious Adverse Events were reported. In Stage I, colonisation rates were similar amongst groups: SpnWT 58.1% (18/31), SpnA1 60% (18/30) and SpnA3 59.4% (19/32). Anti-Spn nasal IgG levels post-colonisation were similar in all groups whilst serum IgG responses were higher in the SpnWT and SpnA1 groups than the SpnA3 group. In colonised individuals, increases in IgG responses were identified against 197 Spn protein antigens and serotype 6 capsular polysaccharide using a pangenome array. Participants given SpnWT or SpnA1 in stage 1 were partially protected against homologous challenge with SpnWT (29% and 30% recolonisation rates, respectively) at stage II, whereas those exposed to SpnA3 achieved recolonisation rate similar to control group group (50% vs 47%, respectively).

CONCLUSION: Nasal colonisation with genetically modified live attenuated Spn was safe and induced protection against recolonisation, suggesting nasal adminstration of live attenuated Spn could be an effective stategy for preventing pneumococcal infections.}, } @article {pmid37555725, year = {2023}, author = {Wei, F and Liang, X and Shi, JC and Luo, J and Qiu, LJ and Li, XX and Lu, LJ and Wen, Y and Feng, J}, title = {Pan-genomic Analysis Identifies the Chinese Strain as a New Subspecies of Xanthomonas fragariae.}, journal = {Plant disease}, volume = {}, number = {}, pages = {}, doi = {10.1094/PDIS-05-23-0933-SC}, pmid = {37555725}, issn = {0191-2917}, abstract = {Xanthomonas fragariae (X. fragariae) is classified as a quarantine pathogen by the European and Mediterranean Plant Protection Organization. It commonly induces typical angular leaf spot (ALS) symptoms in strawberry leaves. X. fragariae strains from China (YL19, SHAQP01, and YLX21) exhibit ALS symptoms in leaves and more severe symptoms of dry cavity rot in strawberry crowns. Conversely, strains from other countries do not cause severe dry cavity rot symptoms in strawberries. Employing multilocus sequence analysis (MLSA), average nucleotide identity (ANI), and amino acid identity (AAI), we determined that Chinese strains of X. fragariae are genetically distinct from other strains and can be considered a new subspecies. Subsequent analysis of 63 X. fragariae genomes published at NCBI using IPGA and EDGAR3.0 revealed the pan-genomic profile, with 1680 shared genes present in the all 63 strains, including 71 virulence-related genes. Additionally, we identified 123 genes exclusive to the of all Chinese strains, encompassing 12 virulence-related genes. The qRT-PCR analysis demonstrated that the expression of XopD, XopG1, CE8, GT2 and GH121, out of 12 virulence-related genes of Chinese strains (YL19) exhibited a constant increase in the early stages (6 hpi, 24 hpi, 54 hpi, and 96 hpi) of strawberry leaf infected by YL19. Hence, the presence of XopD, XopG1, CE8, GT2, and GH121 in Chinese strains may play important roles in the early infection process of Chinese strains. These findings offer novel insights into comprehending the population structure and variation in the pathogenic capacity of X. fragariae.}, } @article {pmid37553643, year = {2023}, author = {Hyun, JC and Palsson, BO}, title = {Reconstruction of the last bacterial common ancestor from 183 pangenomes reveals a versatile ancient core genome.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {183}, pmid = {37553643}, issn = {1474-760X}, support = {U01 AI124316/AI/NIAID NIH HHS/United States ; }, mesh = {Phylogeny ; *Evolution, Molecular ; *Genome ; Gene Frequency ; Bacteria/genetics ; Genome, Bacterial ; }, abstract = {BACKGROUND: Cumulative sequencing efforts have yielded enough genomes to construct pangenomes for dozens of bacterial species and elucidate intraspecies gene conservation. Given the diversity of organisms for which this is achievable, similar analyses for ancestral species are feasible through the integration of pangenomics and phylogenetics, promising deeper insights into the nature of ancient life.

RESULTS: We construct pangenomes for 183 bacterial species from 54,085 genomes and identify their core genomes using a novel statistical model to estimate genome-specific error rates and underlying gene frequencies. The core genomes are then integrated into a phylogenetic tree to reconstruct the core genome of the last bacterial common ancestor (LBCA), yielding three main results: First, the gene content of modern and ancestral core genomes are diverse at the level of individual genes but are similarly distributed by functional category and share several poorly characterized genes. Second, the LBCA core genome is distinct from any individual modern core genome but has many fundamental biological systems intact, especially those involving translation machinery and biosynthetic pathways to all major nucleotides and amino acids. Third, despite this metabolic versatility, the LBCA core genome likely requires additional non-core genes for viability, based on comparisons with the minimal organism, JCVI-Syn3A.

CONCLUSIONS: These results suggest that many cellular systems commonly conserved in modern bacteria were not just present in ancient bacteria but were nearly immutable with respect to short-term intraspecies variation. Extending this analysis to other domains of life will likely provide similar insights into more distant ancestral species.}, } @article {pmid37546276, year = {2023}, author = {Gao, Z and Bian, J and Lu, F and Jiao, Y and He, H}, title = {Triticeae crop genome biology: an endless frontier.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1222681}, pmid = {37546276}, issn = {1664-462X}, abstract = {Triticeae, the wheatgrass tribe, includes several major cereal crops and their wild relatives. Major crops within the Triticeae are wheat, barley, rye, and oat, which are important for human consumption, animal feed, and rangeland protection. Species within this tribe are known for their large genomes and complex genetic histories. Powered by recent advances in sequencing technology, researchers worldwide have made progress in elucidating the genomes of Triticeae crops. In addition to assemblies of high-quality reference genomes, pan-genome studies have just started to capture the genomic diversities of these species, shedding light on our understanding of the genetic basis of domestication and environmental adaptation of Triticeae crops. In this review, we focus on recent signs of progress in genome sequencing, pan-genome analyses, and resequencing analysis of Triticeae crops. We also propose future research avenues in Triticeae crop genomes, including identifying genome structure variations, the association of genomic regions with desired traits, mining functions of the non-coding area, introgression of high-quality genes from wild Triticeae resources, genome editing, and integration of genomic resources.}, } @article {pmid37542576, year = {2023}, author = {Hong, H and Yang, SM and Kim, E and Kim, HJ and Park, SH}, title = {Comprehensive metagenomic analysis of stress-resistant and -sensitive Listeria monocytogenes.}, journal = {Applied microbiology and biotechnology}, volume = {107}, number = {19}, pages = {6047-6056}, pmid = {37542576}, issn = {1432-0614}, support = {E0210702-01//Korea Food Research Institute/ ; }, mesh = {*Listeria monocytogenes/genetics ; Food Microbiology ; Virulence/genetics ; Virulence Factors/genetics ; Whole Genome Sequencing ; }, abstract = {Listeria monocytogenes is a pathogenic bacterium which can live in adverse environments (low pH, high salinity, and low temperature). Even though there are various whole genome sequencing (WGS) data on L. monocytogenes, investigations on genetic differences between stress-resistant and -sensitive L. monocytogenes grown under stress environments have been not fully examined. This study aims to investigate and compare genetic characteristics between stress-resistant and -sensitive L. monocytogenes using whole genome sequencing (WGS). A total of 47 L. monocytogenes strains (43 stress-resistant and 4 stress-sensitive) were selected based on the stress-resistance tests under pH 3, 5% salt concentration, and 1 °C. The sequencing library for WGS was prepared and sequenced using an Illumina MiSeq. Genetic characteristics of two different L. monocytogenes groups were examined to analyze the pangenome, functionality, virulence, antibiotic resistance, core, and unique genes. The functionality of unique genes in the stress-resistant L. monocytogenes was distinct compared to the stress-sensitive L. monocytogenes, such as carbohydrate and nucleotide transport and metabolism. The lisR virulence gene was detected more in the stress-resistant L. monocytogenes than in the stress-sensitive group. Five stress-resistant L. monocytogenes strains possessed tet(M) antibiotic resistance gene. This is the first study suggesting that deep genomic characteristics of L. monocytogenes may have different resistance level under stress conditions. This new insight will aid in understanding the genetic relationship between stress-resistant and -sensitive L. monocytogenes strains isolated from diverse resources. KEY POINTS: • Whole genomes of L. monocytogenes isolated from three different sources were analyzed. • Differences in two L. monocytogenes groups were identified in functionality, virulence, and antibiotic resistance genes. • This study first examines the association between resistances and whole genomes of stress-resistant and -sensitive L. monocytogenes.}, } @article {pmid37538845, year = {2023}, author = {Morales-Olavarría, M and Nuñez-Belmar, J and González, D and Vicencio, E and Rivas-Pardo, JA and Cortez, C and Cárdenas, JP}, title = {Phylogenomic analysis of the Porphyromonas gingivalis - Porphyromonas gulae duo: approaches to the origin of periodontitis.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1226166}, pmid = {37538845}, issn = {1664-302X}, abstract = {Porphyromonas gingivalis is an oral human pathogen associated with the onset and progression of periodontitis, a chronic immune-inflammatory disease characterized by the destruction of the teeth-supporting tissue. P. gingivalis belongs to the genus Porphyromonas, which is characterized by being composed of Gram-negative, asaccharolytic, non-spore-forming, non-motile, obligatory anaerobic species, inhabiting niches such as the oral cavity, urogenital tract, gastrointestinal tract and infected wound from different mammals including humans. Among the Porphyromonas genus, P. gingivalis stands out for its specificity in colonizing the human oral cavity and its keystone pathogen role in periodontitis pathogenesis. To understand the evolutionary process behind P. gingivalis in the context of the Pophyoromonas genus, in this study, we performed a comparative genomics study with publicly available Porphyromonas genomes, focused on four main objectives: (A) to confirm the phylogenetic position of P. gingivalis in the Porphyromonas genus by phylogenomic analysis; (B) the definition and comparison of the pangenomes of P. gingivalis and its relative P. gulae; and (C) the evaluation of the gene family gain/loss events during the divergence of P. gingivalis and P. gulae; (D) the evaluation of the evolutionary pressure (represented by the calculation of Tajima-D values and dN/dS ratios) comparing gene families of P. gingivalis and P. gulae. Our analysis found 84 high-quality assemblies representing P. gingivalis and 14 P. gulae strains (from a total of 233 Porphyromonas genomes). Phylogenomic analysis confirmed that P. gingivalis and P. gulae are highly related lineages, close to P. loveana. Both organisms harbored open pangenomes, with a strong core-to-accessory ratio for housekeeping genes and a negative ratio for unknown function genes. Our analyses also characterized the gene set differentiating P. gulae from P. gingivalis, mainly associated with unknown functions. Relevant virulence factors, such as the FimA, Mfa1, and the hemagglutinins, are conserved in P. gulae, P. gingivalis, and P. loveana, suggesting that the origin of those factors occurred previous to the P. gulae - P. gingivalis divergence. These results suggest an unexpected evolutionary relationship between the P. gulae - P. gingivalis duo and P. loveana, showing more clues about the origin of the role of those organisms in periodontitis.}, } @article {pmid37537691, year = {2023}, author = {Wu, D and Xie, L and Sun, Y and Huang, Y and Jia, L and Dong, C and Shen, E and Ye, CY and Qian, Q and Fan, L}, title = {A syntelog-based pan-genome provides insights into rice domestication and de-domestication.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {179}, pmid = {37537691}, issn = {1474-760X}, mesh = {*Oryza/genetics ; Domestication ; Genome, Plant ; Genes, Plant ; Genomics ; Evolution, Molecular ; }, abstract = {BACKGROUND: Asian rice is one of the world's most widely cultivated crops. Large-scale resequencing analyses have been undertaken to explore the domestication and de-domestication genomic history of Asian rice, but the evolution of rice is still under debate.

RESULTS: Here, we construct a syntelog-based rice pan-genome by integrating and merging 74 high-accuracy genomes based on long-read sequencing, encompassing all ecotypes and taxa of Oryza sativa and Oryza rufipogon. Analyses of syntelog groups illustrate subspecies divergence in gene presence-and-absence and haplotype composition and identify massive genomic regions putatively introgressed from ancient Geng/japonica to ancient Xian/indica or its wild ancestor, including almost all well-known domestication genes and a 4.5-Mbp centromere-spanning block, supporting a single domestication event in main rice subspecies. Genomic comparisons between weedy and cultivated rice highlight the contribution from wild introgression to the emergence of de-domestication syndromes in weedy rice.

CONCLUSIONS: This work highlights the significance of inter-taxa introgression in shaping diversification and divergence in rice evolution and provides an exploratory attempt by utilizing the advantages of pan-genomes in evolutionary studies.}, } @article {pmid37531401, year = {2023}, author = {Burgaya, J and Marin, J and Royer, G and Condamine, B and Gachet, B and Clermont, O and Jaureguy, F and Burdet, C and Lefort, A and de Lastours, V and Denamur, E and Galardini, M and Blanquart, F and , }, title = {The bacterial genetic determinants of Escherichia coli capacity to cause bloodstream infections in humans.}, journal = {PLoS genetics}, volume = {19}, number = {8}, pages = {e1010842}, pmid = {37531401}, issn = {1553-7404}, mesh = {Humans ; Escherichia coli ; *Escherichia coli Infections/genetics/microbiology ; Genes, Bacterial ; Virulence/genetics ; *Sepsis/genetics ; Phylogeny ; }, abstract = {Escherichia coli is both a highly prevalent commensal and a major opportunistic pathogen causing bloodstream infections (BSI). A systematic analysis characterizing the genomic determinants of extra-intestinal pathogenic vs. commensal isolates in human populations, which could inform mechanisms of pathogenesis, diagnostic, prevention and treatment is still lacking. We used a collection of 912 BSI and 370 commensal E. coli isolates collected in France over a 17-year period (2000-2017). We compared their pangenomes, genetic backgrounds (phylogroups, STs, O groups), presence of virulence-associated genes (VAGs) and antimicrobial resistance genes, finding significant differences in all comparisons between commensal and BSI isolates. A machine learning linear model trained on all the genetic variants derived from the pangenome and controlling for population structure reveals similar differences in VAGs, discovers new variants associated with pathogenicity (capacity to cause BSI), and accurately classifies BSI vs. commensal strains. Pathogenicity is a highly heritable trait, with up to 69% of the variance explained by bacterial genetic variants. Lastly, complementing our commensal collection with an older collection from 1980, we predict that pathogenicity continuously increased through 1980, 2000, to 2010. Together our findings imply that E. coli exhibit substantial genetic variation contributing to the transition between commensalism and pathogenicity and that this species evolved towards higher pathogenicity.}, } @article {pmid37530223, year = {2023}, author = {Sun, M and Yan, H and Zhang, A and Jin, Y and Lin, C and Luo, L and Wu, B and Fan, Y and Tian, S and Cao, X and Wang, Z and Luo, J and Yang, Y and Jia, J and Zhou, P and Tang, Q and Jones, CS and Varshney, RK and Srivastava, RK and He, M and Xie, Z and Wang, X and Feng, G and Nie, G and Huang, D and Zhang, X and Zhu, F and Huang, L}, title = {Milletdb: a multi-omics database to accelerate the research of functional genomics and molecular breeding of millets.}, journal = {Plant biotechnology journal}, volume = {}, number = {}, pages = {}, doi = {10.1111/pbi.14136}, pmid = {37530223}, issn = {1467-7652}, support = {CARS-34//CARS/ ; SCCXTD-2021-16//Modern Agricultural Industry System Sichuan Forage Innovation Team/ ; 31771866//National Natural Science Foundation of China/ ; 32071867//National Natural Science Foundation of China/ ; 2021YFYZ0013//Sichuan Province Research Grant/ ; }, abstract = {Millets are a class of nutrient-rich coarse cereals with high resistance to abiotic stress; thus, they guarantee food security for people living in areas with extreme climatic conditions and provide stress-related genetic resources for other crops. However, no platform is available to provide a comprehensive and systematic multi-omics analysis for millets, which seriously hinders the mining of stress-related genes and the molecular breeding of millets. Here, a free, web-accessible, user-friendly millets multi-omics database platform (Milletdb, http://milletdb.novogene.com) has been developed. The Milletdb contains six millets and their one related species genomes, graph-based pan-genomics of pearl millet, and stress-related multi-omics data, which enable Milletdb to be the most complete millets multi-omics database available. We stored GWAS (genome-wide association study) results of 20 yield-related trait data obtained under three environmental conditions [field (no stress), early drought and late drought] for 2 years in the database, allowing users to identify stress-related genes that support yield improvement. Milletdb can simplify the functional genomics analysis of millets by providing users with 20 different tools (e.g., 'Gene mapping', 'Co-expression', 'KEGG/GO Enrichment' analysis, etc.). On the Milletdb platform, a gene PMA1G03779.1 was identified through 'GWAS', which has the potential to modulate yield and respond to different environmental stresses. Using the tools provided by Milletdb, we found that the stress-related PLATZs TFs (transcription factors) family expands in 87.5% of millet accessions and contributes to vegetative growth and abiotic stress responses. Milletdb can effectively serve researchers in the mining of key genes, genome editing and molecular breeding of millets.}, } @article {pmid37529582, year = {2023}, author = {Liang, J and Duan, R and Qin, S and Lv, D and He, Z and Zhang, H and Duan, Q and Xi, J and Chun, H and Fu, G and Zheng, X and Tang, D and Wu, W and Han, H and Jing, H and Wang, X}, title = {The complex genomic diversity of Yersinia pestis on the long-term plague foci in Qinghai-Tibet plateau.}, journal = {Ecology and evolution}, volume = {13}, number = {8}, pages = {e10387}, pmid = {37529582}, issn = {2045-7758}, abstract = {Plague is a typical natural focus disease that circulates in different ecology of vectors and reservoir hosts. We conducted genomic population and phylogenetic analyses of the Yersinia pestis collected from the 12 natural plague foci in China with more than 20 kinds of hosts and vectors. Different ecological landscapes with specific hosts, vectors, and habitat which shape various niches for Y. pestis. The phylogeographic diversity of Y. pestis in different kinds plague foci in China showed host niches adaptation. Most natural plague foci strains are region-and focus-specific, with one predominant subpopulation; but the isolates from the Qinghai-Tibet plateau harbor a higher genetic diversity than other foci. The Y. pestis from Marmota himalayana plague foci are defined as the ancestors of different populations at the root of the evolutionary tree, suggesting several different evolutionary paths to other foci. It has the largest pan-genome and widest SNP distances with most accessory genes enriched in mobilome functions (prophages, transposons). Geological barriers play an important role in the maintenance of local Y. pestis species and block the introduction of non-native strains. This study provides new insights into the control of plague outbreaks and epidemics, deepened the understanding of the evolutionary history of MHPF (M. himalayana plague focus) in China. The population structure and identify clades among different natural foci of China renewed the space cognition of the plague.}, } @article {pmid37526693, year = {2023}, author = {Campillo-Balderas, JA and Lazcano, A and Cottom-Salas, W and Jácome, R and Becerra, A}, title = {Pangenomic Analysis of Nucleo-Cytoplasmic Large DNA Viruses. I: The Phylogenetic Distribution of Conserved Oxygen-Dependent Enzymes Reveals a Capture-Gene Process.}, journal = {Journal of molecular evolution}, volume = {}, number = {}, pages = {}, pmid = {37526693}, issn = {1432-1432}, support = {IN214421//DGAPA-PAPIIT, UNAM/ ; }, abstract = {The Nucleo-Cytoplasmic Large DNA Viruses (NCLDVs) infect a wide range of eukaryotic species, including amoeba, algae, fish, amphibia, arthropods, birds, and mammals. This group of viruses has linear or circular double-stranded DNA genomes whose size spans approximately one order of magnitude, from 100 to 2500 kbp. The ultimate origin of this peculiar group of viruses remains an open issue. Some have argued that NCLDVs' origin may lie in a bacteriophage ancestor that increased its genome size by subsequent recruitment of eukaryotic and bacterial genes. Others have suggested that NCLDVs families originated from cells that underwent an irreversible process of genome reduction. However, the hypothesis that a number of NCLDVs sequences have been recruited from the host genomes has been largely ignored. In the present work, we have performed pangenomic analyses of each of the seven known NCLDVs families. We show that these families' core- and shell genes have cellular homologs, supporting possible escaping-gene events as part of its evolution. Furthermore, the detection of sequences that belong to two protein families (small chain ribonucleotide reductase and Erv1/Air) and to one superfamily [2OG-Fe(II) oxygenases] that are for distribution in all NCLDVs core and shell clusters encoding for oxygen-dependent enzymes suggests that the highly conserved core these viruses originated after the Proterozoic Great Oxidation Event that transformed the terrestrial atmosphere 2.4-2.3 Ga ago.}, } @article {pmid37526649, year = {2023}, author = {Rodrigues, JA and Blankenship, HM and Cha, W and Mukherjee, S and Sloup, RE and Rudrik, JT and Soehnlen, M and Manning, SD}, title = {Pangenomic analyses of antibiotic-resistant Campylobacter jejuni reveal unique lineage distributions and epidemiological associations.}, journal = {Microbial genomics}, volume = {9}, number = {8}, pages = {}, pmid = {37526649}, issn = {2057-5858}, support = {U01 CK000510/CK/NCEZID CDC HHS/United States ; U01CK000510/ACL/ACL HHS/United States ; }, mesh = {Animals ; Cattle ; Anti-Bacterial Agents/pharmacology ; *Campylobacter jejuni/genetics ; *Campylobacter Infections/epidemiology ; Phylogeny ; Multilocus Sequence Typing ; }, abstract = {Application of whole-genome sequencing (WGS) to characterize foodborne pathogens has advanced our understanding of circulating genotypes and evolutionary relationships. Herein, we used WGS to investigate the genomic epidemiology of Campylobacter jejuni, a leading cause of foodborne disease. Among the 214 strains recovered from patients with gastroenteritis in Michigan, USA, 85 multilocus sequence types (STs) were represented and 135 (63.1 %) were phenotypically resistant to at least one antibiotic. Horizontally acquired antibiotic resistance genes were detected in 128 (59.8 %) strains and the genotypic resistance profiles were mostly consistent with the phenotypes. Core-gene phylogenetic reconstruction identified three sequence clusters that varied in frequency, while a neighbour-net tree detected significant recombination among the genotypes (pairwise homoplasy index P<0.01). Epidemiological analyses revealed that travel was a significant contributor to pangenomic and ST diversity of C. jejuni, while some lineages were unique to rural counties and more commonly possessed clinically important resistance determinants. Variation was also observed in the frequency of lineages over the 4 year period with chicken and cattle specialists predominating. Altogether, these findings highlight the importance of geographically specific factors, recombination and horizontal gene transfer in shaping the population structure of C. jejuni. They also illustrate the usefulness of WGS data for predicting antibiotic susceptibilities and surveillance, which are important for guiding treatment and prevention strategies.}, } @article {pmid37525145, year = {2023}, author = {Safar, HA and Alatar, F and Nasser, K and Al-Ajmi, R and Alfouzan, W and Mustafa, AS}, title = {The impact of applying various de novo assembly and correction tools on the identification of genome characterization, drug resistance, and virulence factors of clinical isolates using ONT sequencing.}, journal = {BMC biotechnology}, volume = {23}, number = {1}, pages = {26}, pmid = {37525145}, issn = {1472-6750}, mesh = {*Virulence Factors/genetics ; Reproducibility of Results ; *Genomics ; Escherichia coli/genetics ; High-Throughput Nucleotide Sequencing ; Drug Resistance ; Sequence Analysis, DNA ; }, abstract = {Oxford Nanopore sequencing technology (ONT) is currently widely used due to its affordability, simplicity, and reliability. Despite the advantage ONT has over next-generation sequencing in detecting resistance genes in mobile genetic elements, its relatively high error rate (10-15%) is still a deterrent. Several bioinformatic tools are freely available for raw data processing and obtaining complete and more accurate genome assemblies. In this study, we evaluated the impact of using mix-and-matched read assembly (Flye, Canu, Wtdbg2, and NECAT) and read correction (Medaka, NextPolish, and Racon) tools in generating complete and accurate genome assemblies, and downstream genomic analysis of nine clinical Escherichia coli isolates. Flye and Canu assemblers were the most robust in genome assembly, and Medaka and Racon correction tools significantly improved assembly parameters. Flye functioned well in pan-genome analysis, while Medaka increased the number of core genes detected. Flye, Canu, and NECAT assembler functioned well in detecting antimicrobial resistance genes (AMR), while Wtdbg2 required correction tools for better detection. Flye was the best assembler for detecting and locating both virulence and AMR genes (i.e., chromosomal vs. plasmid). This study provides insight into the performance of several read assembly and read correction tools for analyzing ONT sequencing reads for clinical isolates.}, } @article {pmid37524789, year = {2023}, author = {O'Donnell, S and Yue, JX and Saada, OA and Agier, N and Caradec, C and Cokelaer, T and De Chiara, M and Delmas, S and Dutreux, F and Fournier, T and Friedrich, A and Kornobis, E and Li, J and Miao, Z and Tattini, L and Schacherer, J and Liti, G and Fischer, G}, title = {Telomere-to-telomere assemblies of 142 strains characterize the genome structural landscape in Saccharomyces cerevisiae.}, journal = {Nature genetics}, volume = {55}, number = {8}, pages = {1390-1399}, pmid = {37524789}, issn = {1546-1718}, mesh = {*Saccharomyces cerevisiae/genetics ; Phylogeny ; *Genome ; Genomics ; Telomere/genetics ; }, abstract = {Pangenomes provide access to an accurate representation of the genetic diversity of species, both in terms of sequence polymorphisms and structural variants (SVs). Here we generated the Saccharomyces cerevisiae Reference Assembly Panel (ScRAP) comprising reference-quality genomes for 142 strains representing the species' phylogenetic and ecological diversity. The ScRAP includes phased haplotype assemblies for several heterozygous diploid and polyploid isolates. We identified circa (ca.) 4,800 nonredundant SVs that provide a broad view of the genomic diversity, including the dynamics of telomere length and transposable elements. We uncovered frequent cases of complex aneuploidies where large chromosomes underwent large deletions and translocations. We found that SVs can impact gene expression near the breakpoints and substantially contribute to gene repertoire evolution. We also discovered that horizontally acquired regions insert at chromosome ends and can generate new telomeres. Overall, the ScRAP demonstrates the benefit of a pangenome in understanding genome evolution at population scale.}, } @article {pmid37512795, year = {2023}, author = {Jaén-Luchoro, D and Kahnamouei, A and Yazdanshenas, S and Lindblom, A and Samuelsson, E and Åhrén, C and Karami, N}, title = {Comparative Genomic Analysis of ST131 Subclade C2 of ESBL-Producing E. coli Isolates from Patients with Recurrent and Sporadic Urinary Tract Infections.}, journal = {Microorganisms}, volume = {11}, number = {7}, pages = {}, pmid = {37512795}, issn = {2076-2607}, support = {ALFGBG-725361//Region Västra Götaland/ ; VGFOUREG-929979//Region Västra Götaland/ ; 2020-02518//Sahlgrenska University Hospital/ ; }, abstract = {The global emergence of extended-spectrum beta-lactamase-producing Escherichia coli (ESBL-E. coli), mainly causing urinary tract infections (UTI), is a major threat to human health. ESBL-E. coli sequence type (ST) 131 is the dominating clone worldwide, especially its subclade C2. Patients developing recurrent UTI (RUTI) due to ST131 subclade C2 appear to have an increased risk of recurrent infections. We have thus compared the whole genome of ST131 subclade C2 isolates from 14 patients with RUTI to those from 14 patients with sporadic UTI (SUTI). We aimed to elucidate if isolates causing RUTI can be associated with specific genomic features. Paired isolates from patients with RUTI were identical, presenting 2-18 single nucleotide polymorphism (SNP) differences for all six patients investigated. Comparative genomic analyses, including virulence factors, antibiotic resistance, pangenome and SNP analyses did not find any pattern associated with isolates causing RUTI. Despite extensive whole genome analyses, an increased risk of recurrences seen in patients with UTI due to ST131 subclade C2 isolates could not be explained by bacterial genetic differences in the two groups of isolates. Hence, additional factors that could aid in identifying bacterial properties contributing to the increased risk of RUTI due to ESBL-E. coli ST131 subclade C2 remains to be explored.}, } @article {pmid37511853, year = {2023}, author = {Panova, VV and Dolinnaya, NG and Novoselov, KA and Savitskaya, VY and Chernykh, IS and Kubareva, EA and Alexeevski, AV and Zvereva, MI}, title = {Conserved G-Quadruplex-Forming Sequences in Mammalian TERT Promoters and Their Effect on Mutation Frequency.}, journal = {Life (Basel, Switzerland)}, volume = {13}, number = {7}, pages = {}, pmid = {37511853}, issn = {2075-1729}, support = {21-14-00161//Russian Science Foundation/ ; }, abstract = {Somatic mutations in the promoter region of the human telomerase reverse transcriptase (hTERT) gene have been identified in many types of cancer. The hTERT promoter is known to be enriched with sequences that enable the formation of G-quadruplex (G4) structures, whose presence is associated with elevated mutagenicity and genome instability. Here, we used a bioinformatics tool (QGRS mapper) to search for G4-forming sequences (G4 motifs) in the 1000 bp TERT promoter regions of 141 mammalian species belonging to 20 orders, 5 of which, including primates and predators, contain more than 10 species. Groups of conserved G4 motifs and single-nucleotide variants within these groups were discovered using a block alignment approach (based on the Nucleotide PanGenome explorer). It has been shown that: (i) G4 motifs are predominantly located in the region proximal to the transcription start site (up to 400 bp) and are over-represented on the non-coding strand of the TERT promoters, (ii) 11 to 22% of the G4 motifs found are evolutionarily conserved across the related organisms, and (iii) a statistically significant higher frequency of nucleotide substitutions in the conserved G4 motifs compared to the surrounding regions was confirmed only for the order Primates. These data support the assumption that G4s can interfere with the DNA repair process and affect the evolutionary adaptation of organisms and species.}, } @article {pmid37510288, year = {2023}, author = {Leszczyńska, K and Święcicka, I and Daniluk, T and Lebensztejn, D and Chmielewska-Deptuła, S and Leszczyńska, D and Gawor, J and Kliber, M}, title = {Escherichia albertii as a Potential Enteropathogen in the Light of Epidemiological and Genomic Studies.}, journal = {Genes}, volume = {14}, number = {7}, pages = {}, pmid = {37510288}, issn = {2073-4425}, mesh = {Humans ; Animals ; *Enterobacteriaceae Infections ; *Genome, Bacterial ; Polymorphism, Restriction Fragment Length ; Computational Biology ; Phylogeny ; }, abstract = {Escherichia albertii is a new enteropathogen of humans and animals. The aim of the study was to assess the prevalence and pathogenicity of E. albertii strains isolated in northeastern Poland using epidemiological and genomic studies. In 2015-2018, a total of 1154 fecal samples from children and adults, 497 bird droppings, 212 food samples, 92 water samples, and 500 lactose-negative E. coli strains were tested. A total of 42 E. albertii strains were isolated. The PCR method was suitable for their rapid identification. In total, 33.3% of E. albertii isolates were resistant to one antibiotic, and 16.7% to two. Isolates were sensitive to cefepime, imipenem, levofloxacin, gentamicin, trimethoprim/sulfamethoxazole, and did not produce ESBL β-lactamases. High genetic variability of E. albertii has been demonstrated. In the PFGE method, 90.5% of the strains had distinct pulsotypes. In MLST typing, 85.7% of strains were assigned distinct sequence types (STs), of which 64% were novel ST types. Cytolethal distending toxin (CDT) and Paa toxin genes were found in 100% of E. albertii isolates. Genes encoding toxins, IbeA, CdtB type 2, Tsh and Shiga (Stx2f), were found in 26.2%, 9.7%, 1.7%, and 0.4% of E. albertii isolates, respectively. The chromosome size of the tested strains ranged from 4,573,338 to 5,141,010 bp (average 4,784,003 bp), and at least one plasmid was present in all strains. The study contributes to a more accurate assessment of the genetic diversity of E. albertii and the potential threat it poses to public health.}, } @article {pmid37503282, year = {2023}, author = {Joglekar, P and Conlan, S and Lee-Lin, SQ and Deming, C and Kashaf, SS and , and Kong, HH and Segre, JA}, title = {Integrated genomic and functional analyses of human skin-associated Staphylococcus reveals extensive inter- and intra-species diversity.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37503282}, abstract = {UNLABELLED: Human skin is stably colonized by a distinct microbiota that functions together with epidermal cells to maintain a protective physical barrier. Staphylococcus , a prominent genus of the skin microbiota, participates in colonization resistance, tissue repair, and host immune regulation in strain specific manners. To unlock the potential of engineering skin microbial communities, we aim to fully characterize the functional diversity of this genus within the context of the skin environment. We conducted metagenome and pan-genome analyses of isolates obtained from distinct body sites of healthy volunteers, providing a detailed biogeographic depiction of staphylococcal species that colonize our skin. S. epidermidis , S. capitis, and S. hominis were the most abundant species present in all volunteers and were detected at all body sites. Pan-genome analysis of these three species revealed that the genus-core was dominated by central metabolism genes. Species-specific core genes were enriched in host colonization functions. The majority (∼68%) of genes were detected only in a fraction of isolate genomes, underscoring the immense strain-specific gene diversity. Conspecific genomes grouped into phylogenetic clades, exhibiting body site preference. Each clade was enriched for distinct gene-sets that are potentially involved in site tropism. Finally, we conducted gene expression studies of select isolates showing variable growth phenotypes in skin-like medium. In vitro expression revealed extensive intra- and inter-species gene expression variation, substantially expanding the functional diversification within each species. Our study provides an important resource for future ecological and translational studies to examine the role of shared and strain-specific staphylococcal genes within the skin environment.

SIGNIFICANCE: The bacterial genus Staphylococcus is a prominent member of the human skin microbiome, performing important and diverse functions such as tuning immunity, driving tissue repair, and preventing pathogen colonization. Each of these functions is carried out by a subset of staphylococcal strains, displaying differences in gene content and regulation. Delineating the genomic and functional diversity of Staphylococcus will enable researchers to unlock the potential of engineering skin communities to promote health. Here, we present a comprehensive multi-omics analysis to characterize the inter- and intra-species diversity present in human skin-associated staphylococci. Our study is the first to conduct a detailed pan-genome comparison between prominent skin staphylococcal species giving a valuable insight into gene sharing and provides an important resource.}, } @article {pmid37502876, year = {2023}, author = {Ahmed, NM and Joglekar, P and Deming, C and , and Lemon, KP and Kong, HH and Segre, JA and Conlan, S}, title = {Genomic characterization of the C. tuberculostearicum species complex, a ubiquitous member of the human skin microbiome.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37502876}, abstract = {UNLABELLED: Corynebacterium is a predominant genus in the skin microbiome, yet its genetic diversity on skin is incompletely characterized and lacks a comprehensive set of reference genomes. Our work aims to investigate the distribution of Corynebacterium species on the skin, as well as to expand the existing genome reference catalog to enable more complete characterization of skin metagenomes. We used V1-V3 16S rRNA gene sequencing data from 14 body sites of 23 healthy volunteers to characterize Corynebacterium diversity and distribution across healthy human skin. Corynebacterium tuberculostearicum is the predominant species found on human skin and we identified two distinct C. tuberculostearicum ribotypes (A & B) that can be distinguished by variation in the 16S rRNA V1-V3 sequence. One is distributed across all body sites and the other found primarily on the feet. We performed whole genome sequencing of 40 C. tuberculostearicum isolates cultured from the skin of five healthy individuals across seven skin sites. We generated five closed genomes of diverse C. tuberculostearicum which revealed that C. tuberculostearicum isolates are largely syntenic and carry a diversity of methylation patterns, plasmids and CRISPR/Cas systems. The pangenome of C. tuberculostearicum is open with a core genome size of 1806 genes and a pangenome size of 5451 total genes. This expanded pangenome enabled the mapping of 24% more C. tuberculostearicum reads from shotgun metagenomic datasets derived from skin body sites. Finally, while the genomes from this study all fall within a C. tuberculostearicum species complex, the ribotype B isolates may constitute a new species.

IMPORTANCE: Amplicon sequencing data combined with isolate whole genome sequencing has expanded our understanding of Corynebacterium on the skin. Human skin is characterized by a diverse collection of Corynebacterium species but C. tuberculostearicum predominates many sites. Our work supports the emerging idea that C. tuberculostearicum is a species complex encompassing several distinct species. We produced a collection of genomes that help define this complex including a potentially new species which we are calling C. hallux based on a preference for sites on the feet, whole-genome average nucleotide identity, pangenomics and growth in skin-like media. This isolate collection and high-quality genome resource sets the stage for developing engineered strains for both basic and translational clinical studies. Microbiomes are shaped by taxa that are both characteristic to those sites and functionally important to that community. The genus Corynebacterium is one such taxa for the human skin and nares. Foundational studies using 16S rRNA gene sequencing and shotgun metagenomics by our lab (1, 2) and others (3) have established Corynebacterium as common members of the skin microbiome. While Corynebacterium have been positively correlated with the resolution of dysbiosis associated with eczema flares (4), the importance of the Corynebacterium spp. is less defined for skin disease severity in primary immune deficient patients (5, 6). Corynebacterium spp. are predominant members of the human aerodigestive tract microbiome (nares, oral cavity and respiratory tract) (3) and participate in microbe-microbe interactions with members of nasal microbiome (7, 8). Corynebacterium have been shown to engage with the host immune system, specifically C. accolens -promoted IL23-dependent inflammation in mice on a high-fat diet (9). C. bovis and C. mastiditis have been shown to predominate the microbiome of a ADAM10-deficient mouse model (10) as well as an ADAM17-deficient mouse model of eczema (11). Finally, C. tuberculostearicum has been shown to induce inflammation in human epidermal keratinocyte cell cultures (12). These studies establish Corynebacterium spp. as key members of the skin microbiome capable of both microbe-microbe and microbe-host interactions. A critical resource for understanding the biology of Corynebacterium on the skin is a robust collection of complete reference genomes, including isolates collected from a variety of individuals and body sites. Previously published genome collections from skin- or nares-resident species include Staphylococcus epidermidis (13), Cutibacterium acnes (14) and the recent comparative analysis of Dolosigranulum pigrum (15). Of note, while emerging bioinformatic methods and pipelines are now being employed to extract nearly-complete genomes (MAGs) from metagenomic assemblies of skin samples (16), MAGs are not yet a substitute for genomes from cultured isolates to understand strain level or pangenomic diversity. In addition to functional prediction, comparative genomics is increasingly being used to augment conventional microbiological methods to define or redefine taxonomic boundaries (17, 18), as well as describe the full extent of diversity within these boundaries (19). A pangenome, which encompasses the complete set of genes present within a set of genome sequences, enables the characterization of gene-level heterogeneity within a taxonomic group. The pangenome is commonly subdivided into the 'core' genome, referring to genes present in all strains, and the 'accessory' or 'dispensable' genome, referring to those present in only one or some isolates. (The accessory pangenome can be further subdivided to reflect a wider range of gene uniqueness, e.g. singletons.) Thorough characterization of taxa is limited by the availability of representative and high-quality genome assemblies. Unfortunately, with the exceptions of clinically relevant Corynebacterium spp. (e.g. , C. diphtheriae , C. striatum and C. pseudotuberculosis), the genus is inadequately sequenced, with 75% of species having fewer than six genomes. This includes common skin-associated species like C. tuberculostearicum with just five unique isolate genomes, only two of which are from skin. This work seeks first to characterize the distribution of Corynebacterium across 14 skin sites from 23 healthy volunteers. The second goal of this work focuses on what we identify as the predominant skin Corynebacterium species, C. tuberculostearicum . We have sequenced 23 distinct C. tuberculostearicum strains (n=40 genomes before dereplication), a five-fold increase in the number of publicly available, unique genomes (n=5). In addition to short-read assemblies, we generated five complete genomes which, along with the type strain (DSM44922), demonstrate that C. tuberculostearicum genomes are largely syntenic and carry a number of methylation systems as well as a CRISPR/Cas system. Genes from the C. tuberculostearicum genomes in our collection fall into 5451 gene clusters comprising the species pangenome. This expanded pangenome, as compared to existing public references, improved the mapping of C. tuberculostearicum metagenomic reads from unrelated healthy volunteers. In addition, we have identified a distinct C. tuberculostearicum clade that is highly enriched on the feet that may represent a new species, tentatively designated Corynebacterium hallux .}, } @article {pmid37497030, year = {2023}, author = {Price, C and Russell, JA}, title = {AMAnD: an automated metagenome anomaly detection methodology utilizing DeepSVDD neural networks.}, journal = {Frontiers in public health}, volume = {11}, number = {}, pages = {1181911}, pmid = {37497030}, issn = {2296-2565}, mesh = {Humans ; *Metagenome ; *COVID-19/genetics ; Neural Networks, Computer ; Genomics ; Metagenomics/methods ; }, abstract = {The composition of metagenomic communities within the human body often reflects localized medical conditions such as upper respiratory diseases and gastrointestinal diseases. Fast and accurate computational tools to flag anomalous metagenomic samples from typical samples are desirable to understand different phenotypes, especially in contexts where repeated, long-duration temporal sampling is done. Here, we present Automated Metagenome Anomaly Detection (AMAnD), which utilizes two types of Deep Support Vector Data Description (DeepSVDD) models; one trained on taxonomic feature space output by the Pan-Genomics for Infectious Agents (PanGIA) taxonomy classifier and one trained on kmer frequency counts. AMAnD's semi-supervised one-class approach makes no assumptions about what an anomaly may look like, allowing the flagging of potentially novel anomaly types. Three diverse datasets are profiled. The first dataset is hosted on the National Center for Biotechnology Information's (NCBI) Sequence Read Archive (SRA) and contains nasopharyngeal swabs from healthy and COVID-19-positive patients. The second dataset is also hosted on SRA and contains gut microbiome samples from normal controls and from patients with slow transit constipation (STC). AMAnD can learn a typical healthy nasopharyngeal or gut microbiome profile and reliably flag the anomalous COVID+ or STC samples in both feature spaces. The final dataset is a synthetic metagenome created by the Critical Assessment of Metagenome Annotation Simulator (CAMISIM). A control dataset of 50 well-characterized organisms was submitted to CAMISIM to generate 100 synthetic control class samples. The experimental conditions included 12 different spiked-in contaminants that are taxonomically similar to organisms present in the laboratory blank sample ranging from one strain tree branch taxonomic distance away to one family tree branch taxonomic distance away. This experiment was repeated in triplicate at three different coverage levels to probe the dependence on sample coverage. AMAnD was again able to flag the contaminant inserts as anomalous. AMAnD's assumption-free flagging of metagenomic anomalies, the real-time model training update potential of the deep learning approach, and the strong performance even with lightweight models of low sample cardinality would make AMAnD well-suited to a wide array of applied metagenomics biosurveillance use-cases, from environmental to clinical utility.}, } @article {pmid37494467, year = {2023}, author = {Ma, J and Cáceres, M and Salmela, L and Mäkinen, V and Tomescu, AI}, title = {Chaining for accurate alignment of erroneous long reads to acyclic variation graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {8}, pages = {}, pmid = {37494467}, issn = {1367-4811}, mesh = {Humans ; Sequence Analysis, DNA ; *High-Throughput Nucleotide Sequencing ; *Algorithms ; Sequence Alignment ; Computational Biology ; Software ; }, abstract = {MOTIVATION: Aligning reads to a variation graph is a standard task in pangenomics, with downstream applications such as improving variant calling. While the vg toolkit [Garrison et al. (Variation graph toolkit improves read mapping by representing genetic variation in the reference. Nat Biotechnol 2018;36:875-9)] is a popular aligner of short reads, GraphAligner [Rautiainen and Marschall (GraphAligner: rapid and versatile sequence-to-graph alignment. Genome Biol 2020;21:253-28)] is the state-of-the-art aligner of erroneous long reads. GraphAligner works by finding candidate read occurrences based on individually extending the best seeds of the read in the variation graph. However, a more principled approach recognized in the community is to co-linearly chain multiple seeds.

RESULTS: We present a new algorithm to co-linearly chain a set of seeds in a string labeled acyclic graph, together with the first efficient implementation of such a co-linear chaining algorithm into a new aligner of erroneous long reads to acyclic variation graphs, GraphChainer. We run experiments aligning real and simulated PacBio CLR reads with average error rates 15% and 5%. Compared to GraphAligner, GraphChainer aligns 12-17% more reads, and 21-28% more total read length, on real PacBio CLR reads from human chromosomes 1, 22, and the whole human pangenome. On both simulated and real data, GraphChainer aligns between 95% and 99% of all reads, and of total read length. We also show that minigraph [Li et al. (The design and construction of reference pangenome graphs with minigraph. Genome Biol 2020;21:265-19.)] and minichain [Chandra and Jain (Sequence to graph alignment using gap-sensitive co-linear chaining. In: Proceedings of the 27th Annual International Conference on Research in Computational Molecular Biology (RECOMB 2023). Springer, 2023, 58-73.)] obtain an accuracy of <60% on this setting.

GraphChainer is freely available at https://github.com/algbio/GraphChainer. The datasets and evaluation pipeline can be reached from the previous address.}, } @article {pmid37492100, year = {2023}, author = {Frazer, KA and Schork, NJ}, title = {The human pangenome reference anticipates equitable and fundamental genomic insights.}, journal = {Cell genomics}, volume = {3}, number = {7}, pages = {100360}, pmid = {37492100}, issn = {2666-979X}, abstract = {For the past few years, researchers in the Human Pangenome Reference Consortium (HPRC) have been working to catalog almost all human genomic diversity. Frazer and Schork preview an article recently published in Nature, "A draft human pangenome reference,"[1] which represents the initial release of 47 fully phased diploid assemblies of genomes of individuals with diverse ancestries.}, } @article {pmid37491415, year = {2023}, author = {Matrishin, CB and Haase, EM and Dewhirst, FE and Mark Welch, JL and Miranda-Sanchez, F and Chen, T and MacFarland, DC and Kauffman, KM}, title = {Phages are unrecognized players in the ecology of the oral pathogen Porphyromonas gingivalis.}, journal = {Microbiome}, volume = {11}, number = {1}, pages = {161}, pmid = {37491415}, issn = {2049-2618}, support = {T32DE023526/DE/NIDCR NIH HHS/United States ; R01DE016937/DE/NIDCR NIH HHS/United States ; R03 DE030987/DE/NIDCR NIH HHS/United States ; }, mesh = {Humans ; *Bacteriophages/genetics ; Porphyromonas gingivalis/genetics ; Prophages/genetics ; *Periodontal Diseases ; Base Sequence ; }, abstract = {BACKGROUND: Porphyromonas gingivalis (hereafter "Pg") is an oral pathogen that has been hypothesized to act as a keystone driver of inflammation and periodontal disease. Although Pg is most readily recovered from individuals with actively progressing periodontal disease, healthy individuals and those with stable non-progressing disease are also colonized by Pg. Insights into the factors shaping the striking strain-level variation in Pg, and its variable associations with disease, are needed to achieve a more mechanistic understanding of periodontal disease and its progression. One of the key forces often shaping strain-level diversity in microbial communities is infection of bacteria by their viral (phage) predators and symbionts. Surprisingly, although Pg has been the subject of study for over 40 years, essentially nothing is known of its phages, and the prevailing paradigm is that phages are not important in the ecology of Pg.

RESULTS: Here we systematically addressed the question of whether Pg are infected by phages-and we found that they are. We found that prophages are common in Pg, they are genomically diverse, and they encode genes that have the potential to alter Pg physiology and interactions. We found that phages represent unrecognized targets of the prevalent CRISPR-Cas defense systems in Pg, and that Pg strains encode numerous additional mechanistically diverse candidate anti-phage defense systems. We also found that phages and candidate anti-phage defense system elements together are major contributors to strain-level diversity and the species pangenome of this oral pathogen. Finally, we demonstrate that prophages harbored by a model Pg strain are active in culture, producing extracellular viral particles in broth cultures.

CONCLUSION: This work definitively establishes that phages are a major unrecognized force shaping the ecology and intra-species strain-level diversity of the well-studied oral pathogen Pg. The foundational phage sequence datasets and model systems that we establish here add to the rich context of all that is already known about Pg, and point to numerous avenues of future inquiry that promise to shed new light on fundamental features of phage impacts on human health and disease broadly. Video Abstract.}, } @article {pmid37491393, year = {2023}, author = {Cho, MK and Fullerton, SM and Hammonds, EM and Lee, SS and Panofsky, A and Reardon, J}, title = {Pangenomics: prioritize diversity in collaborations.}, journal = {Nature}, volume = {619}, number = {7971}, pages = {698}, doi = {10.1038/d41586-023-02248-7}, pmid = {37491393}, issn = {1476-4687}, mesh = {Humans ; *Genomics/methods/trends ; *Health Equity ; *Intersectoral Collaboration ; }, } @article {pmid37490004, year = {2023}, author = {Wu, S and Sun, H and Gao, L and Branham, S and McGregor, C and Renner, SS and Xu, Y and Kousik, C and Wechter, WP and Levi, A and Fei, Z}, title = {A Citrullus genus super-pangenome reveals extensive variations in wild and cultivated watermelons and sheds light on watermelon evolution and domestication.}, journal = {Plant biotechnology journal}, volume = {21}, number = {10}, pages = {1926-1928}, pmid = {37490004}, issn = {1467-7652}, support = {2015-51181-24285//National Institute of Food and Agriculture/ ; 2020-51181-32139//National Institute of Food and Agriculture/ ; 1855585//National Science Foundation/ ; }, mesh = {*Citrullus/genetics ; Domestication ; Genome, Plant/genetics ; Polymorphism, Single Nucleotide ; }, } @article {pmid37487084, year = {2023}, author = {Bozan, I and Achakkagari, SR and Anglin, NL and Ellis, D and Tai, HH and Strömvik, MV}, title = {Pangenome analyses reveal impact of transposable elements and ploidy on the evolution of potato species.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {31}, pages = {e2211117120}, pmid = {37487084}, issn = {1091-6490}, mesh = {*Solanum tuberosum ; DNA Transposable Elements ; Phylogeny ; Ploidies ; *Solanum ; }, abstract = {Potato (Solanum sp., family Solanaceae) is the most important noncereal food crop globally. It has over 100 wild relatives in the Solanum section Petota, which features species with both sexual and asexual reproduction and varying ploidy levels. A pangenome of Solanum section Petota composed of 296 accessions was constructed including diploids and polyploids compared via presence/absence variation (PAV). The Petota core (genes shared by at least 97% of the accessions) and shell genomes (shared by 3 to 97%) are enriched in basic molecular and cellular functions, while the cloud genome (genes present in less than 3% of the member accessions) showed enrichment in transposable elements (TEs). Comparison of PAV in domesticated vs. wild accessions was made, and a phylogenetic tree was constructed based on PAVs, grouping accessions into different clades, similar to previous phylogenies produced using DNA markers. A cladewise pangenome approach identified abiotic stress response among the core genes in clade 1+2 and clade 3, and flowering/tuberization among the core genes in clade 4. The TE content differed between the clades, with clade 1+2, which is composed of species from North and Central America with reproductive isolation from species in other clades, having much lower TE content compared to other clades. In contrast, accessions with in vitro propagation history were identified and found to have high levels of TEs. Results indicate a role for TEs in adaptation to new environments, both natural and artificial, for Solanum section Petota.}, } @article {pmid37485508, year = {2023}, author = {Liu, W and Ou, P and Tian, F and Liao, J and Ma, Y and Wang, J and Jin, X}, title = {Anti-Vibrio parahaemolyticus compounds from Streptomyces parvus based on Pan-genome and subtractive proteomics.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1218176}, pmid = {37485508}, issn = {1664-302X}, abstract = {INTRODUCTION: Vibrio parahaemolyticus is a foodborne pathogen commonly found in seafood, and drug resistance poses significant challenges to its control. This study aimed to identify novel drug targets for antibacterial drug discovery.

METHODS: To identify drug targets, we performed a pan-genome analysis on 58 strains of V. parahaemolyticus genomes to obtain core genes. Subsequently, subtractive proteomics and physiochemical checks were conducted on the core proteins to identify potential therapeutic targets. Molecular docking was then employed to screen for anti-V. parahaemolyticus compounds using a in-house compound library of Streptomyces parvus, chosen based on binding energy. The anti-V. parahaemolyticus efficacy of the identified compounds was further validated through a series of experimental tests.

RESULTS AND DISCUSSION: Pangenome analysis of 58 V. parahaemolyticus genomes revealed that there were 1,392 core genes. After Subtractive proteomics and physiochemical checks, Flagellar motor switch protein FliN was selected as a therapeutic target against V. parahaemolyticus. FliN was modeled and docked with Streptomyces parvus source compounds, and Actinomycin D was identified as a potential anti-V. parahaemolyticus agent with a strong binding energy. Experimental verification confirmed its effectiveness in killing V. parahaemolyticus and significantly inhibiting biofilm formation and motility. This study is the first to use pan-genome and subtractive proteomics to identify new antimicrobial targets for V. parahaemolyticus and to identify the anti-V. parahaemolyticus effect of Actinomycin D. These findings suggest potential avenues for the development of new antibacterial drugs to control V. parahaemolyticus infections.}, } @article {pmid37480395, year = {2023}, author = {Tanuku, SNR and Pinnaka, AK and Behera, S and Singh, A and Pydi, S and Vasudeva, G and Vaidya, B and Sharma, G and Ganta, SK and Garbhapu, NS}, title = {Marinobacterium lacunae sp. nov. isolated from estuarine sediment.}, journal = {Archives of microbiology}, volume = {205}, number = {8}, pages = {294}, pmid = {37480395}, issn = {1432-072X}, support = {GAP3195//Ministry of Earth Sciences/ ; }, mesh = {Phylogeny ; RNA, Ribosomal, 16S/genetics ; Agar ; *Alteromonadaceae ; Cardiolipins ; }, abstract = {A novel motile bacterium was isolated from a sediment sample collected in Kochi backwaters, Kerala, India. This bacterium is Gram negative, rod shaped, 1.0-1.5 µm wide, and 2.0-3.0 µm long. It was designated as strain AK27[T]. Colonies were grown on marine agar displayed circular, off-white, shiny, moist, translucent, flat, margin entire, 1-2 mm in diameter. The major fatty acids identified in this strain were C18:1 ω7c, C16:0, and summed in feature 3. The composition of polar lipids in the strain AK27[T] included phosphatidylglycerol, phosphatidylethanolamine, diphosphatidylglycerol, one unidentified amino lipid, two unidentified aminophospholipids, two unidentified phospholipids, and six unidentified lipids. The genomic DNA of strain AK27[T] exhibited a G+C content of 56.4 mol%. Based on the analysis of 16S rRNA gene sequence, strain AK27[T] showed sequence similarity to M. ramblicola D7[T] and M. zhoushanense WM3[T] as 98.99% and 98.58%, respectively. Compared to other type strains of the Marinobacterium genus, strain AK27[T] exhibited sequence similarities ranging from 91.7% to 96.4%. When compared to Marinobacterium zhoushanense WM3[T] and Marinobacterium ramblicola D7[T], strain AK27[T] exhibited average nucleotide identity values of 80.25% and 79.97%, and dDDH values of 22.9% and 22.6%, respectively. The genome size of the strain AK27[T] was 4.55 Mb, with 4,229 coding sequences. Based on the observed phenotypic and chemotaxonomic features, and the results of phylogenetic and phylogenomic analysis, this study proposes the classification of strain AK27[T] as a novel species within the genus Marinobacterium. The proposed name for this novel species is Marinobacterium lacunae sp. nov.}, } @article {pmid37477947, year = {2023}, author = {Lyu, X and Xia, Y and Wang, C and Zhang, K and Deng, G and Shen, Q and Gao, W and Zhang, M and Liao, N and Ling, J and Bo, Y and Hu, Z and Yang, J and Zhang, M}, title = {Pan-genome analysis sheds light on structural variation-based dissection of agronomic traits in melon crops.}, journal = {Plant physiology}, volume = {193}, number = {2}, pages = {1330-1348}, doi = {10.1093/plphys/kiad405}, pmid = {37477947}, issn = {1532-2548}, support = {2018YFD1000800//National Key Research and Development Program of China/ ; CARS-25-17//Earmarked Fund for China Agriculture Research System/ ; 2021R51007//Special Support Plan for high-level talents of Zhejiang Province/ ; 2021Z057//Major science and technology project of Ningbo City/ ; }, mesh = {Chromosome Mapping ; *Cucurbitaceae/genetics/metabolism ; Genome-Wide Association Study ; Plant Breeding ; Genes, Plant ; *Cucumis melo/genetics ; Fruit/genetics/metabolism ; }, abstract = {Sweetness and appearance of fresh fruits are key palatable and preference attributes for consumers and are often controlled by multiple genes. However, fine-mapping the key loci or genes of interest by single genome-based genetic analysis is challenging. Herein, we present the chromosome-level genome assembly of 1 landrace melon accession (Cucumis melo ssp. agrestis) with wild morphologic features and thus construct a melon pan-genome atlas via integrating sequenced melon genome datasets. Our comparative genomic analysis reveals a total of 3.4 million genetic variations, of which the presence/absence variations (PAVs) are mainly involved in regulating the function of genes for sucrose metabolism during melon domestication and improvement. We further resolved several loci that are accountable for sucrose contents, flesh color, rind stripe, and suture using a structural variation (SV)-based genome-wide association study. Furthermore, via bulked segregation analysis (BSA)-seq and map-based cloning, we uncovered that a single gene, (CmPIRL6), determines the edible or inedible characteristics of melon fruit exocarp. These findings provide important melon pan-genome information and provide a powerful toolkit for future pan-genome-informed cultivar breeding of melon.}, } @article {pmid37476668, year = {2023}, author = {Agarwal, V and Stubits, R and Nassrullah, Z and Dillon, MM}, title = {Pangenome insights into the diversification and disease specificity of worldwide Xanthomonas outbreaks.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1213261}, pmid = {37476668}, issn = {1664-302X}, abstract = {The bacterial genus Xanthomonas is responsible for disease outbreaks in several hundred plant species, many of them economically important crops. In the era of next-generation sequencing, thousands of strains from this genus have now been sequenced as part of isolated studies that focus on outbreak characterization, host range, diversity, and virulence factor identification. However, these data have not been synthesized and we lack a comprehensive phylogeny for the genus, with some species designations in public databases still relying on phenotypic similarities and representative sequence typing. The extent of genetic cohesiveness among Xanthomonas strains, the distribution of virulence factors across strains, and the impact of evolutionary history on host range across the genus are also poorly understood. In this study, we present a pangenome analysis of 1,910 diverse Xanthomonas genomes, highlighting their evolutionary relationships, the distribution of virulence-associated genes across strains, and rates of horizontal gene transfer. We find a number of broadly conserved classes of virulence factors and considerable diversity in the Type 3 Secretion Systems (T3SSs) and Type 3 Secreted Effector (T3SE) repertoires of different Xanthomonas species. We also use these data to re-assign incorrectly classified strains to phylogenetically informed species designations and find evidence of both monophyletic host specificity and convergent evolution of phylogenetically distant strains to the same host. Finally, we explore the role of recombination in maintaining genetic cohesion within the Xanthomonas genus as a result of both ancestral and recent recombination events. Understanding the evolutionary history of Xanthomonas species and the relationship of key virulence factors with host-specificity provides valuable insight into the mechanisms through which Xanthomonas species shift between hosts and will enable us to develop more robust resistance strategies against these highly virulent pathogens.}, } @article {pmid37474912, year = {2023}, author = {Ortega-Sanz, I and Barbero-Aparicio, JA and Canepa-Oneto, A and Rovira, J and Melero, B}, title = {CamPype: an open-source workflow for automated bacterial whole-genome sequencing analysis focused on Campylobacter.}, journal = {BMC bioinformatics}, volume = {24}, number = {1}, pages = {291}, pmid = {37474912}, issn = {1471-2105}, support = {LCF/PR/PR18/51130007//"la Caixa" Foundation/ ; }, mesh = {*Campylobacter/genetics ; Genome, Bacterial ; Workflow ; Bacteria/genetics ; Genomics ; }, abstract = {BACKGROUND: The rapid expansion of Whole-Genome Sequencing has revolutionized the fields of clinical and food microbiology. However, its implementation as a routine laboratory technique remains challenging due to the growth of data at a faster rate than can be effectively analyzed and critical gaps in bioinformatics knowledge.

RESULTS: To address both issues, CamPype was developed as a new bioinformatics workflow for the genomics analysis of sequencing data of bacteria, especially Campylobacter, which is the main cause of gastroenteritis worldwide making a negative impact on the economy of the public health systems. CamPype allows fully customization of stages to run and tools to use, including read quality control filtering, read contamination, reads extension and assembly, bacterial typing, genome annotation, searching for antibiotic resistance genes, virulence genes and plasmids, pangenome construction and identification of nucleotide variants. All results are processed and resumed in an interactive HTML report for best data visualization and interpretation.

CONCLUSIONS: The minimal user intervention of CamPype makes of this workflow an attractive resource for microbiology laboratories with no expertise in bioinformatics as a first line method for bacterial typing and epidemiological analyses, that would help to reduce the costs of disease outbreaks, or for comparative genomic analyses. CamPype is publicly available at https://github.com/JoseBarbero/CamPype .}, } @article {pmid37474911, year = {2023}, author = {Huff, M and Hulse-Kemp, AM and Scheffler, BE and Youngblood, RC and Simpson, SA and Babiker, E and Staton, M}, title = {Long-read, chromosome-scale assembly of Vitis rotundifolia cv. Carlos and its unique resistance to Xylella fastidiosa subsp. fastidiosa.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {409}, pmid = {37474911}, issn = {1471-2164}, support = {6062-21000-010-013//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-000-D//USDA-ARS/ ; 6062-21000-010-013//USDA-ARS/ ; }, mesh = {*Vitis/microbiology ; Disease Resistance/genetics ; *Xylella/genetics ; Chromosomes ; Plant Diseases/genetics/microbiology ; }, abstract = {BACKGROUND: Muscadine grape (Vitis rotundifolia) is resistant to many of the pathogens that negatively impact the production of common grape (V. vinifera), including the bacterial pathogen Xylella fastidiosa subsp. fastidiosa (Xfsf), which causes Pierce's Disease (PD). Previous studies in common grape have indicated Xfsf delays host immune response with a complex O-chain antigen produced by the wzy gene. Muscadine cultivars range from tolerant to completely resistant to Xfsf, but the mechanism is unknown.

RESULTS: We assembled and annotated a new, long-read genome assembly for 'Carlos', a cultivar of muscadine that exhibits tolerance, to build upon the existing genetic resources available for muscadine. We used these resources to construct an initial pan-genome for three cultivars of muscadine and one cultivar of common grape. This pan-genome contains a total of 34,970 synteny-constrained entries containing genes of similar structure. Comparison of resistance gene content between the 'Carlos' and common grape genomes indicates an expansion of resistance (R) genes in 'Carlos.' We further identified genes involved in Xfsf response by transcriptome sequencing 'Carlos' plants inoculated with Xfsf. We observed 234 differentially expressed genes with functions related to lipid catabolism, oxidation-reduction signaling, and abscisic acid (ABA) signaling as well as seven R genes. Leveraging public data from previous experiments of common grape inoculated with Xfsf, we determined that most differentially expressed genes in the muscadine response were not found in common grape, and three of the R genes identified as differentially expressed in muscadine do not have an ortholog in the common grape genome.

CONCLUSIONS: Our results support the utility of a pan-genome approach to identify candidate genes for traits of interest, particularly disease resistance to Xfsf, within and between muscadine and common grape.}, } @article {pmid37465028, year = {2023}, author = {Thieringer, PH and Boyd, ES and Templeton, AS and Spear, JR}, title = {Metapangenomic investigation provides insight into niche differentiation of methanogenic populations from the subsurface serpentinizing environment, Samail Ophiolite, Oman.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1205558}, pmid = {37465028}, issn = {1664-302X}, abstract = {Serpentinization reactions produce highly reduced waters that have hyperalkaline pH and that can have high concentrations of H2 and CH4. Putatively autotrophic methanogenic archaea have been identified in the subsurface waters of the Samail Ophiolite, Sultanate of Oman, though the strategies to overcome hyperalkaline pH and dissolved inorganic carbon limitation remain to be fully understood. Here, we recovered metagenome assembled genomes (MAGs) and applied a metapangenomic approach to three different Methanobacterium populations to assess habitat-specific functional gene distribution. A Type I population was identified in the fluids with neutral pH, while a Type II and "Mixed" population were identified in the most hyperalkaline fluids (pH 11.63). The core genome of all Methanobacterium populations highlighted potential DNA scavenging techniques to overcome phosphate or nitrogen limitation induced by environmental conditions. With particular emphasis on the Mixed and Type II population found in the most hyperalkaline fluids, the accessory genomes unique to each population reflected adaptation mechanisms suggesting lifestyles that minimize niche overlap. In addition to previously reported metabolic capability to utilize formate as an electron donor and generate intracellular CO2, the Type II population possessed genes relevant to defense against antimicrobials and assimilating potential osmoprotectants to provide cellular stability. The accessory genome of the Mixed population was enriched in genes for multiple glycosyltransferases suggesting reduced energetic costs by adhering to mineral surfaces or to other microorganisms, and fostering a non-motile lifestyle. These results highlight the niche differentiation of distinct Methanobacterium populations to circumvent the challenges of serpentinization impacted fluids through coexistence strategies, supporting our ability to understand controls on methanogenic lifestyles and adaptations within the serpentinizing subsurface fluids of the Samail Ophiolite.}, } @article {pmid37464310, year = {2023}, author = {Singh, RP and Kumari, K and Sharma, PK and Ma, Y}, title = {Characterization and in-depth genome analysis of a halotolerant probiotic bacterium Paenibacillus sp. S-12, a multifarious bacterium isolated from Rauvolfia serpentina.}, journal = {BMC microbiology}, volume = {23}, number = {1}, pages = {192}, pmid = {37464310}, issn = {1471-2180}, mesh = {*Rauwolfia/genetics ; *Paenibacillus/genetics ; Base Composition ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; DNA, Bacterial/genetics ; Sequence Analysis, DNA ; Fatty Acids ; Soil Microbiology ; }, abstract = {BACKGROUND: Members of Paenibacillus genus from diverse habitats have attracted great attention due to their multifarious properties. Considering that members of this genus are mostly free-living in soil, we characterized the genome of a halotolerant environmental isolate belonging to the genus Paenibacillus. The genome mining unravelled the presence of CAZymes, probiotic, and stress-protected genes that suggested strain S-12 for industrial and agricultural purposes.

RESULTS: Molecular identification by 16 S rRNA gene sequencing showed its closest match to other Paenibacillus species. The complete genome size of S-12 was 5.69 Mb, with a GC-content 46.5%. The genome analysis of S-12 unravelled the presence of an open reading frame (ORF) encoding the functions related to environmental stress tolerance, adhesion processes, multidrug efflux systems, and heavy metal resistance. Genome annotation identified the various genes for chemotaxis, flagellar motility, and biofilm production, illustrating its strong colonization ability.

CONCLUSION: The current findings provides the in-depth investigation of a probiotic Paenibacillus bacterium that possessed various genome features that enable the bacterium to survive under diverse conditions. The strain shows the strong ability for probiotic application purposes.}, } @article {pmid37461539, year = {2023}, author = {Steenwyk, JL and Knowles, S and Bastos, RW and Balamurugan, C and Rinker, D and Mead, ME and Roberts, CD and Raja, HA and Li, Y and Colabardini, AC and de Castro, PA and Dos Reis, TF and Canóvas, D and Sanchez, RL and Lagrou, K and Torrado, E and Rodrigues, F and Oberlies, NH and Zhou, X and Goldman, GH and Rokas, A}, title = {Evolutionary origin, population diversity, and diagnostics for a cryptic hybrid pathogen.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37461539}, abstract = {Cryptic fungal pathogens pose significant identification and disease management challenges due to their morphological resemblance to known pathogenic species while harboring genetic and (often) infection-relevant trait differences. The cryptic fungal pathogen Aspergillus latus , an allodiploid hybrid originating from Aspergillus spinulosporus and an unknown close relative of Aspergillus quadrilineatus within section Nidulantes , remains poorly understood. The absence of accurate diagnostics for A. latus has led to misidentifications, hindering epidemiological studies and the design of effective treatment plans. We conducted an in-depth investigation of the genomes and phenotypes of 44 globally distributed isolates (41 clinical isolates and three type strains) from Aspergillus section Nidulantes . We found that 21 clinical isolates were A. latus ; notably, standard methods of pathogen identification misidentified all A. latus isolates. The remaining isolates were identified as A. spinulosporus (8), A. quadrilineatus (1), or A. nidulans (11). Phylogenomic analyses shed light on the origin of A. latus , indicating one or two hybridization events gave rise to the species during the Miocene, approximately 15.4 to 8.8 million years ago. Characterizing the A. latus pangenome uncovered substantial genetic diversity within gene families and biosynthetic gene clusters. Transcriptomic analysis revealed that both parental genomes are actively expressed in nearly equal proportions and respond to environmental stimuli. Further investigation into infection-relevant chemical and physiological traits, including drug resistance profiles, growth under oxidative stress conditions, and secondary metabolite biosynthesis, highlight distinct phenotypic profiles of the hybrid A. latus compared to its parental and closely related species. Leveraging our comprehensive genomic and phenotypic analyses, we propose five genomic and phenotypic markers as diagnostics for A. latus species identification. These findings provide valuable insights into the evolutionary origin, genomic outcome, and phenotypic implications of hybridization in a cryptic fungal pathogen, thus enhancing our understanding of the underlying processes contributing to fungal pathogenesis. Furthermore, our study underscores the effectiveness of extensive genomic and phenotypic analyses as a promising approach for developing diagnostics applicable to future investigations of cryptic and emerging pathogens.}, } @article {pmid37460717, year = {2023}, author = {Kumari, K and Sharma, PK and Shikha, S and Singh, RP}, title = {Molecular characterization and in-depth genome analysis of Enterobacter sp. S-16.}, journal = {Functional & integrative genomics}, volume = {23}, number = {3}, pages = {245}, pmid = {37460717}, issn = {1438-7948}, mesh = {Humans ; *Enterobacter/genetics ; RNA, Ribosomal, 16S ; *Genome, Bacterial ; Genomics ; Carbohydrates ; Phylogeny ; }, abstract = {Enterobacter species are considered to be an opportunistic human pathogen owing to the existence of antibiotic-resistant strains and drug resides; however, the detailed analysis of the antibiotic resistance and virulence features in environmental isolates is poorly characterized. Here, in the study, we characterized the biochemical characteristics, and genome, pan-genome, and comparative genome analyses of an environmental isolate Enterobacter sp. S-16. The strain was identified as Enterobacter spp. by using 16S rRNA gene sequencing. To unravel genomic features, whole genome of Enterobacter sp. S-16 was sequenced using a hybrid assembly approach and genome assembly was performed using the Unicycler tool. The assembled genome contained the single conting size 5.3 Mbp, GC content 55.43%, and 4500 protein-coding genes. The genome analysis revealed the various gene clusters associated with virulence, antibiotic resistance, type VI secretion system (T6SS), and many stress tolerant genes, which may provide important insight for adapting to changing environment conditions. Moreover, different metabolic pathways were identified that potentially contribute to environmental survival. Various hydrolytic enzymes and motility functions equipped the strain S-16 as an active colonizer. The genome analysis confirms the presence of carbohydrate-active enzymes (CAZymes), and non-enzymatic carbohydrate-binding modules (CBMs) involved in the hydrolysis of complex carbohydrate polymers. Moreover, the pan-genome analysis provides detailed information about the core genes and shared genes with the closest related Enterobacter species. The present study is the first report showing the presence of YdhE/NorM in Enterobacter spp. Thus, the elucidation of genome sequencing will increase our understanding of the pathogenic nature of environmental isolate, supporting the One Health Concept.}, } @article {pmid37449094, year = {2023}, author = {Buzzanca, D and Kerkhof, PJ and Alessandria, V and Rantsiou, K and Houf, K}, title = {Arcobacteraceae comparative genome analysis demonstrates genome heterogeneity and reduction in species isolated from animals and associated with human illness.}, journal = {Heliyon}, volume = {9}, number = {7}, pages = {e17652}, pmid = {37449094}, issn = {2405-8440}, abstract = {The Arcobacteraceae family groups Gram-negative bacterial species previously included in the family Campylobacteraceae. These species of which some are considered foodborne pathogens, have been isolated from different environmental niches and hosts. They have been isolated from various types of foods, though predominantly from food of animal origin, as well as from stool of humans with enteritis. Their different abilities to survive in different hosts and environments suggest an evolutionary pressure with consequent variation in their genome content. Moreover, their different physiological and genomic characteristics led to the recent proposal to create new genera within this family, which is however criticized due to the lack of discriminatory features and biological and clinical relevance. Aims of the present study were to assess the Arcobacteraceae pangenome, and to characterize existing similarities and differences in 20 validly described species. For this, analysis has been conducted on the genomes of the corresponding type strains obtained by Illumina sequencing, applying several bioinformatic tools. Results of the present study do not support the proposed division into different genera and revealed the presence of pangenome partitions with numbers comparable to other Gram-negative bacteria genera, such as Campylobacter. Different gene class compositions in animal and human-associated species are present, including a higher percentage of virulence-related gene classes such as cell motility genes. The adaptation to environmental and/or host conditions of some species was identified by the presence of specific genes. Furthermore, a division into pathogenic and non-pathogenic species is suggested, which can support future research on food safety and public health.}, } @article {pmid37446042, year = {2023}, author = {Arifuzzaman, M and Jost, M and Wang, M and Chen, X and Perovic, D and Park, RF and Rouse, M and Forrest, K and Hayden, M and Khan, GA and Dracatos, PM}, title = {Mining the Australian Grains Gene Bank for Rust Resistance in Barley.}, journal = {International journal of molecular sciences}, volume = {24}, number = {13}, pages = {}, pmid = {37446042}, issn = {1422-0067}, mesh = {Chromosome Mapping ; *Hordeum/genetics/microbiology ; Disease Resistance/genetics ; Australia ; Phenotype ; *Basidiomycota/genetics ; Plant Diseases/genetics/microbiology ; }, abstract = {Global barley production is threatened by plant pathogens, especially the rusts. In this study we used a targeted genotype-by-sequencing (GBS) assisted GWAS approach to identify rust resistance alleles in a collection of 287 genetically distinct diverse barley landraces and historical cultivars available in the Australian Grains Genebank (AGG) and originally sourced from Eastern Europe. The accessions were challenged with seven US-derived cereal rust pathogen races including Puccinia hordei (Ph-leaf rust) race 17VA12C, P. coronata var. hordei (Pch-crown rust) race 91NE9305 and five pathogenically diverse races of P. striiformis f. sp. hordei (Psh-stripe rust) (PSH-33, PSH-48, PSH-54, PSH-72 and PSH-100) and phenotyped quantitatively at the seedling stage. Novel resistance factors were identified on chromosomes 1H, 2H, 4H and 5H in response to Pch, whereas a race-specific QTL on 7HS was identified that was effective only to Psh isolates PSH-72 and PSH-100. A major effect QTL on chromosome 5HL conferred resistance to all Psh races including PSH-72, which is virulent on all 12 stripe rust differential tester lines. The same major effect QTL was also identified in response to leaf rust (17VA12C) suggesting this locus contains several pathogen specific rust resistance genes or the same gene is responsible for both leaf rust and stripe rust resistance. Twelve accessions were highly resistant to both leaf and stripe rust diseases and also carried the 5HL QTL. We subsequently surveyed the physical region at the 5HL locus for across the barley pan genome variation in the presence of known resistance gene candidates and identified a rich source of high confidence protein kinase and antifungal genes in the QTL region.}, } @article {pmid37435610, year = {2023}, author = {Deverka, P and Geary, J and Mathews, C and Cohen, M and Hooker, G and Majumder, M and Skvarkova, Z and Cook-Deegan, R}, title = {Payer reimbursement practices and incentives for improving interpretation of germline genetic testing.}, journal = {Journal of law and the biosciences}, volume = {10}, number = {2}, pages = {lsad020}, doi = {10.1093/jlb/lsad020}, pmid = {37435610}, issn = {2053-9711}, abstract = {Germline genetic testing for inherited cancer risk has shifted to multi-gene panel tests (MGPTs). While MGPTs detect more pathogenic variants, they also detect more variants of uncertain significance (VUSs) that increase the possibility of harms such as unnecessary surgery. Data sharing by laboratories is critical to addressing the VUS problem. However, barriers to sharing and an absence of incentives have limited laboratory contributions to the ClinVar database. Payers can play a crucial role in the expansion of knowledge and effectiveness of genetic testing. Current policies affecting MGPT reimbursement are complex and create perverse incentives. Trends in utilization and coverage for private payers and Medicare illustrate opportunities and challenges for data sharing to close knowledge gaps and improve clinical utility. Policy options include making data sharing (i) a condition of payment, and (ii) a metric of laboratory quality in payment contracts, yielding preferred coverage or enhanced reimbursement. Mandating data sharing sufficient to verify interpretations and resolve discordance among labs under Medicare and federal health programs is an option for the US Congress. Such policies can reduce the current waste of valuable data needed for precision oncology and improved patient outcomes, enabling a learning health system.}, } @article {pmid37434713, year = {2023}, author = {Batarseh, TN and Batarseh, SN and Morales-Cruz, A and Gaut, BS}, title = {Comparative genomics of the Liberibacter genus reveals widespread diversity in genomic content and positive selection history.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1206094}, pmid = {37434713}, issn = {1664-302X}, abstract = {'Candidatus Liberibacter' is a group of bacterial species that are obligate intracellular plant pathogens and cause Huanglongbing disease of citrus trees and Zebra Chip in potatoes. Here, we examined the extent of intra- and interspecific genetic diversity across the genus using comparative genomics. Our approach examined a wide set of Liberibacter genome sequences including five pathogenic species and one species not known to cause disease. By performing comparative genomics analyses, we sought to understand the evolutionary history of this genus and to identify genes or genome regions that may affect pathogenicity. With a set of 52 genomes, we performed comparative genomics, measured genome rearrangement, and completed statistical tests of positive selection. We explored markers of genetic diversity across the genus, such as average nucleotide identity across the whole genome. These analyses revealed the highest intraspecific diversity amongst the 'Ca. Liberibacter solanacearum' species, which also has the largest plant host range. We identified sets of core and accessory genes across the genus and within each species and measured the ratio of nonsynonymous to synonymous mutations (dN/dS) across genes. We identified ten genes with evidence of a history of positive selection in the Liberibacter genus, including genes in the Tad complex, which have been previously implicated as being highly divergent in the 'Ca. L. capsica' species based on high values of dN.}, } @article {pmid37433982, year = {2023}, author = {Attwaters, M}, title = {A diverse and inclusive human pangenome.}, journal = {Nature reviews. Genetics}, volume = {24}, number = {9}, pages = {585}, pmid = {37433982}, issn = {1471-0064}, } @article {pmid37431308, year = {2023}, author = {Amas, JC and Bayer, PE and Hong Tan, W and Tirnaz, S and Thomas, WJW and Edwards, D and Batley, J}, title = {Comparative pangenome analyses provide insights into the evolution of Brassica rapa resistance gene analogues (RGAs).}, journal = {Plant biotechnology journal}, volume = {21}, number = {10}, pages = {2100-2112}, pmid = {37431308}, issn = {1467-7652}, support = {DP200100762//Australian Research Council/ ; DP210100296//Australian Research Council/ ; UWA1905- 006RTX//Grains Research and Development Corporation/ ; }, mesh = {*Brassica rapa/genetics ; Genes, Plant/genetics ; Disease Resistance/genetics ; Leucine ; Plant Breeding ; *Brassica napus/genetics ; }, abstract = {Brassica rapa is grown worldwide as economically important vegetable and oilseed crop. However, its production is challenged by yield-limiting pathogens. The sustainable control of these pathogens mainly relies on the deployment of genetic resistance primarily driven by resistance gene analogues (RGAs). While several studies have identified RGAs in B. rapa, these were mainly based on a single genome reference and do not represent the full range of RGA diversity in B. rapa. In this study, we utilized the B. rapa pangenome, constructed from 71 lines encompassing 12 morphotypes, to describe a comprehensive repertoire of RGAs in B. rapa. We show that 309 RGAs were affected by presence-absence variation (PAV) and 223 RGAs were missing from the reference genome. The transmembrane leucine-rich repeat (TM-LRR) RGA class had more core gene types than variable genes, while the opposite was observed for nucleotide-binding site leucine-rich repeats (NLRs). Comparative analysis with the B. napus pangenome revealed significant RGA conservation (93%) between the two species. We identified 138 candidate RGAs located within known B. rapa disease resistance QTL, of which the majority were under negative selection. Using blackleg gene homologues, we demonstrated how these genes in B. napus were derived from B. rapa. This further clarifies the genetic relationship of these loci, which may be useful in narrowing-down candidate blackleg resistance genes. This study provides a novel genomic resource towards the identification of candidate genes for breeding disease resistance in B. rapa and its relatives.}, } @article {pmid37430957, year = {2022}, author = {Rani, A and Dike, CC and Mantri, N and Ball, A}, title = {Point-of-Care Lateral Flow Detection of Viable Escherichia coli O157:H7 Using an Improved Propidium Monoazide-Recombinase Polymerase Amplification Method.}, journal = {Foods (Basel, Switzerland)}, volume = {11}, number = {20}, pages = {}, pmid = {37430957}, issn = {2304-8158}, abstract = {The detection of both viable and viable but non-culturable (VBNC) Escherichia coli O157:H7 is a crucial part of food safety. Traditional culture-dependent methods are lengthy, expensive, laborious, and unable to detect VBNC. Hence, there is a need to develop a rapid, simple, and cost-effective detection method to differentiate between viable/dead E. coli O157:H7 and detect VBNC cells. In this work, recombinase polymerase amplification (RPA) was developed for the detection of viable E. coli O157:H7 through integration with propidium monoazide (PMAxx). Initially, two primer sets, targeting two different genes (rfbE and stx) were selected, and DNA amplification by RPA combined with PMAxx treatment and the lateral flow assay (LFA) was carried out. Subsequently, the rfbE gene target was found to be more effective in inhibiting the amplification from dead cells and detecting only viable E. coli O157:H7. The assay's detection limit was found to be 10[2] CFU/mL for VBNC E. coli O157:H7 when applied to spiked commercial beverages including milk, apple juice, and drinking water. pH values from 3 to 11 showed no significant effect on the efficacy of the assay. The PMAxx-RPA-LFA was completed at 39 °C within 40 min. This study introduces a rapid, robust, reliable, and reproducible method for detecting viable bacterial counts. In conclusion, the optimised assay has the potential to be used by the food and beverage industry in quality assurance related to E. coli O157:H7.}, } @article {pmid37429841, year = {2023}, author = {Tisza, MJ and Smith, DDN and Clark, AE and Youn, JH and , and Khil, PP and Dekker, JP}, title = {Roving methyltransferases generate a mosaic epigenetic landscape and influence evolution in Bacteroides fragilis group.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {4082}, pmid = {37429841}, issn = {2041-1723}, mesh = {Humans ; *Methyltransferases/genetics ; Bacteroides fragilis/genetics ; Epigenomics ; DNA Methylation/genetics ; *Bacteriophages/genetics ; Bacteroides ; Epigenesis, Genetic ; }, abstract = {Three types of DNA methyl modifications have been detected in bacterial genomes, and mechanistic studies have demonstrated roles for DNA methylation in physiological functions ranging from phage defense to transcriptional control of virulence and host-pathogen interactions. Despite the ubiquity of methyltransferases and the immense variety of possible methylation patterns, epigenomic diversity remains unexplored for most bacterial species. Members of the Bacteroides fragilis group (BFG) reside in the human gastrointestinal tract as key players in symbiotic communities but also can establish anaerobic infections that are increasingly multi-drug resistant. In this work, we utilize long-read sequencing technologies to perform pangenomic (n = 383) and panepigenomic (n = 268) analysis of clinical BFG isolates cultured from infections seen at the NIH Clinical Center over four decades. Our analysis reveals that single BFG species harbor hundreds of DNA methylation motifs, with most individual motif combinations occurring uniquely in single isolates, implying immense unsampled methylation diversity within BFG epigenomes. Mining of BFG genomes identified more than 6000 methyltransferase genes, approximately 1000 of which were associated with intact prophages. Network analysis revealed substantial gene flow among disparate phage genomes, implying a role for genetic exchange between BFG phages as one of the ultimate sources driving BFG epigenome diversity.}, } @article {pmid37424551, year = {2023}, author = {Narayanan, S and Couger, B and Bates, H and Gupta, SK and Malayer, J and Ramachandran, A}, title = {Characterization of three Francisella tularensis genomes from Oklahoma, USA.}, journal = {Access microbiology}, volume = {5}, number = {6}, pages = {acmi000451}, pmid = {37424551}, issn = {2516-8290}, abstract = {Francisella tularensis , the causative agent for tularaemia, is a Tier 1 select agent, and a pan-species pathogen of global significance due to its zoonotic potential. Consistent genome characterization of the pathogen is essential to identify novel genes, virulence factors, antimicrobial resistance genes, for studying phylogenetics and other features of interest. This study was conducted to understand the genetic variations among genomes of F. tularensis isolated from two felines and one human source. Pan-genome analysis revealed that 97.7 % of genes were part of the core genome. All three F. tularensis isolates were assigned to sequence type A based on single nucleotide polymorphisms (SNPs) in sdhA. Most of the virulence genes were part of the core genome. An antibiotic resistance gene coding for class A beta-lactamase was detected in all three isolates. Phylogenetic analysis showed that these isolates clustered with other isolates reported from Central and South-Central USA. Assessment of large sets of the F. tularensis genome sequences is essential in understanding pathogen dynamics, geographical distribution and potential zoonotic implications.}, } @article {pmid37423939, year = {2023}, author = {Priyamvada, P and Ramaiah, S}, title = {Pan-genome and reverse vaccinology approaches to design multi-epitope vaccine against Epstein-Barr virus associated with colorectal cancer.}, journal = {Immunologic research}, volume = {}, number = {}, pages = {}, pmid = {37423939}, issn = {1559-0755}, abstract = {Epstein-Barr virus (EBV) is a global lymphotropic virus and has been associated with various malignancies, among which colorectal cancer (CRC) is the prevalent one causing mortality worldwide. In the recent past, numerous research efforts have been made to develop a potential vaccine against this virus; however, none is effective possibly due to their low throughput, laboriousness, and lack of sensitivity. In this study, we designed a multi-epitope subunit vaccine that targets latent membrane protein (LMP-2B) of EBV using pan-genome and reverse vaccinology approaches. Twenty-three major histocompatibility complex (MHC) epitopes (five class-I and eighteen class-II) and eight B-cell epitopes, which have been found to be antigenic, immunogenic, and non-toxic, were selected for the vaccine construction. Furthermore, 24 vaccine constructs (VCs) were designed from the predicted epitopes and out of which VC1 was selected and finalized based on its structural parameters. The functionality of VC1 was validated through molecular docking with different immune receptors (MHC class-I, MHC class-II, and TLRs). The binding affinity, molecular and immune simulation revealed that the VC1 had more stable interaction and is believed to elicit good immune responses against EBV. HIGHLIGHTS: Pan-genome and reverse vaccinology approaches were used to design a multi-epitope subunit vaccine against LMP-2B protein of EBV. Epitopes were selected based on the antigenic, immunogenic, and non-toxic properties. Twenty-four vaccine constructs (VCs) were designed from the predicted epitopes. Designed vaccine VC1 has shown good binding affinity and molecular and immune simulation. VC1 was validated using molecular docking with different immune receptors.}, } @article {pmid37409285, year = {2023}, author = {Luo, M and Sarnowski, TJ and Libault, M and Ríos, G and Charron, JB and Mantri, N and Zhang, S}, title = {Editorial: New insights into mechanisms of epigenetic modifiers in plant growth and development, volume II.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1213511}, pmid = {37409285}, issn = {1664-462X}, } @article {pmid37408640, year = {2023}, author = {Kumari, K and Rawat, V and Shadan, A and Sharma, PK and Deb, S and Singh, RP}, title = {In-depth genome and pan-genome analysis of a metal-resistant bacterium Pseudomonas parafulva OS-1.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1140249}, pmid = {37408640}, issn = {1664-302X}, abstract = {A metal-resistant bacterium Pseudomonas parafulva OS-1 was isolated from waste-contaminated soil in Ranchi City, India. The isolated strain OS-1 showed its growth at 25-45°C, pH 5.0-9.0, and in the presence of ZnSO4 (upto 5 mM). Phylogenetic analysis based on 16S rRNA gene sequences revealed that strain OS-1 belonged to the genus Pseudomonas and was most closely related to parafulva species. To unravel the genomic features, we sequenced the complete genome of P. parafulva OS-1 using Illumina HiSeq 4,000 sequencing platform. The results of average nucleotide identity (ANI) analysis indicated the closest similarity of OS-1 to P. parafulva PRS09-11288 and P. parafulva DTSP2. The metabolic potential of P. parafulva OS-1 based on Clusters of Othologous Genes (COG) and Kyoto Encyclopedia of Genes and Genomes (KEGG) indicated a high number of genes related to stress protection, metal resistance, and multiple drug-efflux, etc., which is relatively rare in P. parafulva strains. Compared with other parafulva strains, P. parafulva OS-1 was found to have the unique β-lactam resistance and type VI secretion system (T6SS) gene. Additionally, its genomes encode various CAZymes such as glycoside hydrolases and other genes associated with lignocellulose breakdown, suggesting that strain OS-1 have strong biomass degradation potential. The presence of genomic complexity in the OS-1 genome indicates that horizontal gene transfer (HGT) might happen during evolution. Therefore, genomic and comparative genome analysis of parafulva strains is valuable for further understanding the mechanism of resistance to metal stress and opens a perspective to exploit a newly isolated bacterium for biotechnological applications.}, } @article {pmid37401440, year = {2023}, author = {Glick, L and Mayrose, I}, title = {The Effect of Methodological Considerations on the Construction of Gene-Based Plant Pan-genomes.}, journal = {Genome biology and evolution}, volume = {15}, number = {7}, pages = {}, pmid = {37401440}, issn = {1759-6653}, support = {//Edmond J. Safra Center for Bioinformatics at Tel-Aviv University/ ; US-5089-18//BARD US-Israel Agricultural Research and Development Fund/ ; }, mesh = {*Genomics/methods ; Genome, Plant ; Sequence Analysis, DNA ; Molecular Sequence Annotation ; Plants/genetics ; *Arabidopsis/genetics ; }, abstract = {Pan-genomics is an emerging approach for studying the genetic diversity within plant populations. In contrast to common resequencing studies that compare whole genome sequencing data with a single reference genome, the construction of a pan-genome (PG) involves the direct comparison of multiple genomes to one another, thereby enabling the detection of genomic sequences and genes not present in the reference, as well as the analysis of gene content diversity. Although multiple studies describing PGs of various plant species have been published in recent years, a better understanding regarding the effect of the computational procedures used for PG construction could guide researchers in making more informed methodological decisions. Here, we examine the effect of several key methodological factors on the obtained gene pool and on gene presence-absence detections by constructing and comparing multiple PGs of Arabidopsis thaliana and cultivated soybean, as well as conducting a meta-analysis on published PGs. These factors include the construction method, the sequencing depth, and the extent of input data used for gene annotation. We observe substantial differences between PGs constructed using three common procedures (de novo assembly and annotation, map-to-pan, and iterative assembly) and that results are dependent on the extent of the input data. Specifically, we report low agreement between the gene content inferred using different procedures and input data. Our results should increase the awareness of the community to the consequences of methodological decisions made during the process of PG construction and emphasize the need for further investigation of commonly applied methodologies.}, } @article {pmid37397999, year = {2023}, author = {Raghuram, V and Gunoskey, JJ and Hofstetter, KS and Jacko, NF and Shumaker, MJ and Hu, YJ and Read, TD and David, MZ}, title = {Comparison of genomic diversity between single and pooled Staphylococcus aureus colonies isolated from human colonisation cultures.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37397999}, support = {R01 AI139188/AI/NIAID NIH HHS/United States ; R01 AI158452/AI/NIAID NIH HHS/United States ; }, abstract = {The most common approach to sampling the bacterial populations within an infected or colonised host is to sequence genomes from a single colony obtained from a culture plate. However, it is recognized that this method does not capture the genetic diversity in the population. An alternative is to sequence a mixture containing multiple colonies ("pool-seq"), but this has the disadvantage that it is a non-homogeneous sample, making it difficult to perform specific experiments. We compared differences in measures of genetic diversity between eight single-colony isolates (singles) and pool-seq on a set of 2286 S. aureus culture samples. The samples were obtained by swabbing three body sites on 85 human participants quarterly for a year, who initially presented with a methicillin-resistant S. aureus skin and soft-tissue infection (SSTI). We compared parameters such as sequence quality, contamination, allele frequency, nucleotide diversity and pangenome diversity in each pool to the corresponding singles. Comparing singles from the same culture plate, we found that 18% of sample collections contained mixtures of multiple Multilocus sequence types (MLSTs or STs). We showed that pool-seq data alone could predict the presence of multi-ST populations with 95% accuracy. We also showed that pool-seq could be used to estimate the number of polymorphic sites in the population. Additionally, we found that the pool may contain clinically relevant genes such as antimicrobial resistance markers that may be missed when only examining singles. These results highlight the potential advantage of analysing genome sequences of total populations obtained from clinical cultures rather than single colonies.}, } @article {pmid37396358, year = {2023}, author = {Simpson, AC and Eedara, VVR and Singh, NK and Damle, N and Parker, CW and Karouia, F and Mason, CE and Venkateswaran, K}, title = {Comparative genomic analysis of Cohnella hashimotonis sp. nov. isolated from the International Space Station.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1166013}, pmid = {37396358}, issn = {1664-302X}, abstract = {A single strain from the family Paenibacillaceae was isolated from the wall behind the Waste Hygiene Compartment aboard the International Space Station (ISS) in April 2018, as part of the Microbial Tracking mission series. This strain was identified as a gram-positive, rod-shaped, oxidase-positive, catalase-negative motile bacterium in the genus Cohnella, designated as F6_2S_P_1[T]. The 16S sequence of the F6_2S_P_1[T] strain places it in a clade with C. rhizosphaerae and C. ginsengisoli, which were originally isolated from plant tissue or rhizosphere environments. The closest 16S and gyrB matches to strain F6_2S_P_1[T] are to C. rhizosphaerae with 98.84 and 93.99% sequence similarity, while a core single-copy gene phylogeny from all publicly available Cohnella genomes places it as more closely related to C. ginsengisoli. Average nucleotide identity (ANI) and digital DNA-DNA hybridization (dDDH) values to any described Cohnella species are <89 and <22%, respectively. The major fatty acids for strain F6_2S_P_1[T] are anteiso-C15:0 (51.7%), iso-C16:0 (23.1%), and iso-C15:0 (10.5%), and it is able to metabolize a wide range of carbon compounds. Given the results of the ANI and dDDH analyses, this ISS strain is a novel species within the genus Cohnella for which we propose the name Cohnella hashimotonis, with the type strain F6_2S_P_1[T] (=NRRL B-65657[T] and DSMZ 115098[T]). Because no closely related Cohnella genomes were available, this study generated the whole-genome sequences (WGSs) of the type strains for C. rhizosphaerae and C. ginsengisoli. Phylogenetic and pangenomic analysis reveals that F6_2S_P_1[T], C. rhizosphaerae, and C. ginsengisoli, along with two uncharacterized Cohnella strains, possess a shared set of 332 gene clusters which are not shared with any other WGS of Cohnella species, and form a distinct clade branching off from C. nanjingensis. Functional traits were predicted for the genomes of strain F6_2S_P_1[T] and other members of this clade.}, } @article {pmid37395662, year = {2023}, author = {Moreno, E and Middlebrook, EA and Altamirano-Silva, P and Al Dahouk, S and Araj, GF and Arce-Gorvel, V and Arenas-Gamboa, Á and Ariza, J and Barquero-Calvo, E and Battelli, G and Bertu, WJ and Blasco, JM and Bosilkovski, M and Cadmus, S and Caswell, CC and Celli, J and Chacón-Díaz, C and Chaves-Olarte, E and Comerci, DJ and Conde-Álvarez, R and Cook, E and Cravero, S and Dadar, M and De Boelle, X and De Massis, F and Díaz, R and Escobar, GI and Fernández-Lago, L and Ficht, TA and Foster, JT and Garin-Bastuji, B and Godfroid, J and Gorvel, JP and Güler, L and Erdenliğ-Gürbilek, S and Gusi, AM and Guzmán-Verri, C and Hai, J and Hernández-Mora, G and Iriarte, M and Jacob, NR and Keriel, A and Khames, M and Köhler, S and Letesson, JJ and Loperena-Barber, M and López-Goñi, I and McGiven, J and Melzer, F and Mora-Cartin, R and Moran-Gilad, J and Muñoz, PM and Neubauer, H and O'Callaghan, D and Ocholi, R and Oñate, Á and Pandey, P and Pappas, G and Pembroke, JT and Roop, M and Ruiz-Villalonos, N and Ryan, MP and Salcedo, SP and Salvador-Bescós, M and Sangari, FJ and de Lima Santos, R and Seimenis, A and Splitter, G and Suárez-Esquivel, M and Tabbaa, D and Trangoni, MD and Tsolis, RM and Vizcaíno, N and Wareth, G and Welburn, SC and Whatmore, A and Zúñiga-Ripa, A and Moriyón, I}, title = {If You're Not Confused, You're Not Paying Attention: Ochrobactrum Is Not Brucella.}, journal = {Journal of clinical microbiology}, volume = {61}, number = {8}, pages = {e0043823}, pmid = {37395662}, issn = {1098-660X}, support = {001/WHO_/World Health Organization/International ; R01 AI158372/AI/NIAID NIH HHS/United States ; }, mesh = {Animals ; Humans ; *Brucella/genetics ; *Ochrobactrum/genetics ; Phylogeny ; *Brucellosis/diagnosis/microbiology ; Attention ; Confusion ; }, abstract = {Bacteria of the genus Brucella are facultative intracellular parasites that cause brucellosis, a severe animal and human disease. Recently, a group of taxonomists merged the brucellae with the primarily free-living, phylogenetically related Ochrobactrum spp. in the genus Brucella. This change, founded only on global genomic analysis and the fortuitous isolation of some opportunistic Ochrobactrum spp. from medically compromised patients, has been automatically included in culture collections and databases. We argue that clinical and environmental microbiologists should not accept this nomenclature, and we advise against its use because (i) it was presented without in-depth phylogenetic analyses and did not consider alternative taxonomic solutions; (ii) it was launched without the input of experts in brucellosis or Ochrobactrum; (iii) it applies a non-consensus genus concept that disregards taxonomically relevant differences in structure, physiology, population structure, core-pangenome assemblies, genome structure, genomic traits, clinical features, treatment, prevention, diagnosis, genus description rules, and, above all, pathogenicity; and (iv) placing these two bacterial groups in the same genus creates risks for veterinarians, medical doctors, clinical laboratories, health authorities, and legislators who deal with brucellosis, a disease that is particularly relevant in low- and middle-income countries. Based on all this information, we urge microbiologists, bacterial collections, genomic databases, journals, and public health boards to keep the Brucella and Ochrobactrum genera separate to avoid further bewilderment and harm.}, } @article {pmid37395647, year = {2023}, author = {Queiroz, VF and Carvalho, JVRP and de Souza, FG and Lima, MT and Santos, JD and Rocha, KLS and de Oliveira, DB and Araújo, JP and Ullmann, LS and Rodrigues, RAL and Abrahão, JS}, title = {Analysis of the Genomic Features and Evolutionary History of Pithovirus-Like Isolates Reveals Two Major Divergent Groups of Viruses.}, journal = {Journal of virology}, volume = {97}, number = {7}, pages = {e0041123}, pmid = {37395647}, issn = {1098-5514}, mesh = {Humans ; *Genome, Viral/genetics ; Genomics ; *Giant Viruses/classification/genetics ; *Phylogeny ; Genetic Variation ; Evolution, Molecular ; }, abstract = {New representatives of the phylum Nucleocytoviricota have been rapidly described in the last decade. Despite this, not all viruses of this phylum are allocated to recognized taxonomic families, as is the case for orpheovirus, pithovirus, and cedratvirus, which form the proposed family Pithoviridae. In this study, we performed comprehensive comparative genomic analyses of 8 pithovirus-like isolates, aiming to understand their common traits and evolutionary history. Structural and functional genome annotation was performed de novo for all the viruses, which served as a reference for pangenome construction. The synteny analysis showed substantial differences in genome organization between these viruses, with very few and short syntenic blocks shared between orpheovirus and its relatives. It was possible to observe an open pangenome with a significant increase in the slope when orpheovirus was added, alongside a decrease in the core genome. Network analysis placed orpheovirus as a distant and major hub with a large fraction of unique clusters of orthologs, indicating a distant relationship between this virus and its relatives, with only a few shared genes. Additionally, phylogenetic analyses of strict core genes shared with other viruses of the phylum reinforced the divergence of orpheovirus from pithoviruses and cedratviruses. Altogether, our results indicate that although pithovirus-like isolates share common features, this group of ovoid-shaped giant viruses presents substantial differences in gene contents, genomic architectures, and the phylogenetic history of several core genes. Our data indicate that orpheovirus is an evolutionarily divergent viral entity, suggesting its allocation to a different viral family, Orpheoviridae. IMPORTANCE Giant viruses that infect amoebae form a monophyletic group named the phylum Nucleocytoviricota. Despite being genomically and morphologically very diverse, the taxonomic categories of some clades that form this phylum are not yet well established. With advances in isolation techniques, the speed at which new giant viruses are described has increased, escalating the need to establish criteria to define the emerging viral taxa. In this work, we performed a comparative genomic analysis of representatives of the putative family Pithoviridae. Based on the dissimilarity of orpheovirus from the other viruses of this putative family, we propose that orpheovirus be considered a member of an independent family, Orpheoviridae, and suggest criteria to demarcate families consisting of ovoid-shaped giant viruses.}, } @article {pmid37395521, year = {2023}, author = {Meng, PQ and Zhang, Q and Ding, Y and Lin, JX and Chen, F}, title = {Evolutionary and Pan-genome Analysis of Three Important Black-pigmented Periodontal Pathogens.}, journal = {The Chinese journal of dental research}, volume = {26}, number = {2}, pages = {93-104}, doi = {10.3290/j.cjdr.b4128023}, pmid = {37395521}, issn = {1867-5646}, mesh = {*Prevotella/genetics/metabolism ; Phylogeny ; Prevotella intermedia/genetics/metabolism ; *Porphyromonas gingivalis/genetics/metabolism ; Prevotella nigrescens/genetics ; }, abstract = {OBJECTIVE: To analyse the pan-genome of three black-pigmented periodontal pathogens: Porphyromonas gingivalis, Prevotella intermedia and Prevotella nigrescens.

METHODS: Pan-genome analyses of 66, 33 and 5 publicly available whole-genome sequences of P. gingivalis, P. intermedia and P. nigrescens, respectively, were performed using Pan-genome Analysis Pipeline software (version 1.2.1; Beijing Institute of Genomics, Chinese Academy of Sciences, Beijing, PR China). Phylogenetic trees were constructed based on the entire pan-genome and single nucleotide polymorphisms within the core genome. The distribution and abundance of virulence genes in the core and dispensable genomes were also compared in the three species.

RESULTS: All three species possess an open pan-genome. The core genome of P. gingivalis, P. intermedia and P. nigrescens included 1001, 1514 and 1745 orthologous groups, respectively, which were mainly related to basic cellular functions such as metabolism. The dispensable genome of P. gingivalis, P. intermedia and P. nigrescens was composed of 2814, 2689 and 906 orthologous groups, respectively, and it was enriched in genes involved in pathogenicity or with unknown functions. Phylogenetic trees presented a clear separation of P. gingivalis, P. intermedia and P. nigrescens, verifying the reclassification of the black-pigmented species. Furthermore, the three species shared almost the same virulence factors involved in adhesion, proteolysis and evasion of host defences. Some of these virulence genes were conserved across species whereas others belonged to the dispensable genome, which might be acquired through horizontal gene transfer.

CONCLUSION: This study highlighted the usefulness of pan-genome analysis to infer evolutionary cues for black-pigmented species, indicating their homology and phylogenomic diversity.}, } @article {pmid37393724, year = {2023}, author = {He, Y and Pan, J and Huang, D and Sanford, RA and Peng, S and Wei, N and Sun, W and Shi, L and Jiang, Z and Jiang, Y and Hu, Y and Li, S and Li, Y and Li, M and Dong, Y}, title = {Distinct microbial structure and metabolic potential shaped by significant environmental gradient impacted by ferrous slag weathering.}, journal = {Environment international}, volume = {178}, number = {}, pages = {108067}, doi = {10.1016/j.envint.2023.108067}, pmid = {37393724}, issn = {1873-6750}, mesh = {Humans ; *Bacteria/genetics ; Metagenome ; *Microbiota ; Weather ; Carbon/metabolism ; }, abstract = {Alkaline ferrous slags pose global environmental issues and long-term risks to ambient environments. To explore the under-investigated microbial structure and biogeochemistry in such unique ecosystems, combined geochemical, microbial, ecological and metagenomic analyses were performed in the areas adjacent to a ferrous slag disposal plant in Sichuan, China. Different levels of exposure to ultrabasic slag leachate had resulted in a significant geochemical gradient of pH (8.0-12.4), electric potential (-126.9 to 437.9 mV), total organic carbon (TOC, 1.5-17.3 mg/L), and total nitrogen (TN, 0.17-1.01 mg/L). Distinct microbial communities were observed depending on their exposure to the strongly alkaline leachate. High pH and Ca[2+] concentrations were associated with low microbial diversity and enrichment of bacterial classes Gamma-proteobacteria and Deinococci in the microbial communities exposed to the leachate. Combined metagenomic analyses of 4 leachate-unimpacted and 2-impacted microbial communities led to the assembly of one Serpentinomonas pangenome and 81 phylogenetically diversified metagenome assembled genomes (MAGs). The prevailing taxa in the leachate-impacted habitats (e.g., Serpentinomonas and Meiothermus spp.) were phylogenetically related to those in active serpentinizing ecosystems, suggesting the analogous processes between the man-made and natural systems. More importantly, they accounted for significant abundance of most functional genes associated with environmental adaptation and major element cycling. Their metabolic potential (e.g., cation/H[+] antiporters, carbon fixation on lithospheric carbon source, and respiration coupling sulfur oxidization and oxygen or nitrate reduction) may support these taxa to survive and prosper in these unique geochemical niches. This study provides fundamental understandings of the adaptive strategies of microorganisms in response to the strong environmental perturbation by alkali tailings. It also contributes to a better comprehension of how to remediate environments affected by alkaline industrial material.}, } @article {pmid37389215, year = {2023}, author = {Conte, AL and Brunetti, F and Marazzato, M and Longhi, C and Maurizi, L and Raponi, G and Palamara, AT and Grassi, S and Conte, MP}, title = {Atopic dermatitis-derived Staphylococcus aureus strains: what makes them special in the interplay with the host.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1194254}, pmid = {37389215}, issn = {2235-2988}, mesh = {Humans ; *Dermatitis, Atopic ; Staphylococcus aureus/genetics ; Multilocus Sequence Typing ; Genotype ; Skin ; }, abstract = {BACKGROUND: Atopic dermatitis (AD) is a chronic inflammatory skin condition whose pathogenesis involves genetic predisposition, epidermal barrier dysfunction, alterations in the immune responses and microbial dysbiosis. Clinical studies have shown a link between Staphylococcus aureus and the pathogenesis of AD, although the origins and genetic diversity of S. aureus colonizing patients with AD is poorly understood. The aim of the study was to investigate if specific clones might be associated with the disease.

METHODS: WGS analyses were performed on 38 S. aureus strains, deriving from AD patients and healthy carriers. Genotypes (i.e. MLST, spa-, agr- and SCCmec-typing), genomic content (e.g. virulome and resistome), and the pan-genome structure of strains have been investigated. Phenotypic analyses were performed to determine the antibiotic susceptibility, the biofilm production and the invasiveness within the investigated S. aureus population.

RESULTS: Strains isolated from AD patients revealed a high degree of genetic heterogeneity and a shared set of virulence factors and antimicrobial resistance genes, suggesting that no genotype and genomic content are uniquely associated with AD. The same strains were characterized by a lower variability in terms of gene content, indicating that the inflammatory conditions could exert a selective pressure leading to the optimization of the gene repertoire. Furthermore, genes related to specific mechanisms, like post-translational modification, protein turnover and chaperones as well as intracellular trafficking, secretion and vesicular transport, were significantly more enriched in AD strains. Phenotypic analysis revealed that all of our AD strains were strong or moderate biofilm producers, while less than half showed invasive capabilities.

CONCLUSIONS: We conclude that in AD skin, the functional role played by S. aureus may depend on differential gene expression patterns and/or on post-translational modification mechanisms rather than being associated with peculiar genetic features.}, } @article {pmid37386186, year = {2023}, author = {Ahsan, MU and Liu, Q and Perdomo, JE and Fang, L and Wang, K}, title = {A survey of algorithms for the detection of genomic structural variants from long-read sequencing data.}, journal = {Nature methods}, volume = {20}, number = {8}, pages = {1143-1158}, pmid = {37386186}, issn = {1548-7105}, support = {GM132713/GM/NIGMS NIH HHS/United States ; }, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Algorithms ; *Genome ; Genomic Structural Variation ; High-Throughput Nucleotide Sequencing/methods ; Genome, Human ; }, abstract = {As long-read sequencing technologies are becoming increasingly popular, a number of methods have been developed for the discovery and analysis of structural variants (SVs) from long reads. Long reads enable detection of SVs that could not be previously detected from short-read sequencing, but computational methods must adapt to the unique challenges and opportunities presented by long-read sequencing. Here, we summarize over 50 long-read-based methods for SV detection, genotyping and visualization, and discuss how new telomere-to-telomere genome assemblies and pangenome efforts can improve the accuracy and drive the development of SV callers in the future.}, } @article {pmid37382545, year = {2023}, author = {Chen, X and Zhang, H and Feng, J and Zhang, L and Zheng, M and Luo, H and Zhuo, H and Xu, N and Zhang, X and Chen, C and Qu, P and Li, Y}, title = {Comparative Genomic Analysis Reveals Genetic Diversity and Pathogenic Potential of Haemophilus seminalis and Emended Description of Haemophilus seminalis.}, journal = {Microbiology spectrum}, volume = {11}, number = {4}, pages = {e0477222}, pmid = {37382545}, issn = {2165-0497}, mesh = {Humans ; RNA, Ribosomal, 16S/genetics ; *Hemin ; *Haemophilus/genetics ; Haemophilus influenzae ; Genomics ; Phylogeny ; Genetic Variation ; }, abstract = {Haemophilus seminalis is a newly proposed species that is phylogenetically related to Haemophilus haemolyticus. The distribution of H. seminalis in the human population, its genomic diversity, and its pathogenic potential are still unclear. This study reports the finding of our comparative genomic analyses of four newly isolated Haemophilus strains (SZY H8, SZY H35, SZY H36, and SZY H68) from human sputum specimens (Guangzhou, China) along with the publicly available genomes of other phylogenetically related Haemophilus species. Based on pairwise comparisons of the 16S rRNA gene sequences, the four isolates showed <98.65% sequence identity to the type strains of all known Haemophilus species but were identified as belonging to H. seminalis, based on comparable phenotypic and genotypic features. Additionally, the four isolates showed high genome-genome relatedness indices (>95% ANI values) with 17 strains that were previously identified as either "Haemophilus intermedius" or hemin (X-factor)-independent H. haemolyticus and therefore required a more detailed classification study. Phylogenetically, these isolates, along with the two previously described H. seminalis isolates (a total of 23 isolates), shared a highly homologous lineage that is distinct from the clades of the main H. haemolyticus and Haemophilus influenzae strains. These isolates present an open pangenome with multiple virulence genes. Notably, all 23 isolates have a functional heme biosynthesis pathway that is similar to that of Haemophilus parainfluenzae. The phenotype of hemin (X-factor) independence and the analysis of the ispD, pepG, and moeA genes can be used to distinguish these isolates from H. haemolyticus and H. influenzae. Based on the above findings, we propose a reclassification for all "H. intermedius" and two H. haemolyticus isolates belonging to H. seminalis with an emended description of H. seminalis. This study provides a more accurate identification of Haemophilus isolates for use in the clinical laboratory and a better understanding of the clinical significance and genetic diversity in human environments. IMPORTANCE As a versatile opportunistic pathogen, the accurate identification of Haemophilus species is a challenge in clinical practice. In this study, we characterized the phenotypic and genotypic features of four H. seminalis strains that were isolated from human sputum specimens and propose the "H. intermedius" and hemin (X-factor)-independent H. haemolyticus isolates as belonging to H. seminalis. The prediction of virulence-related genes indicates that H. seminalis isolates carry several virulence genes that are likely to play an important role in its pathogenicity. In addition, we depict that the genes ispD, pepG, and moeA can be used as biomarkers for distinguishing H. seminalis from H. haemolyticus and H. influenzae. Our findings provide some insights into the identification, epidemiology, genetic diversity, pathogenic potential, and antimicrobial resistance of the newly proposed H. seminalis.}, } @article {pmid37382302, year = {2023}, author = {Puente-Sánchez, F and Hoetzinger, M and Buck, M and Bertilsson, S}, title = {Exploring environmental intra-species diversity through non-redundant pangenome assemblies.}, journal = {Molecular ecology resources}, volume = {23}, number = {7}, pages = {1724-1736}, doi = {10.1111/1755-0998.13826}, pmid = {37382302}, issn = {1755-0998}, support = {892961//H2020 Marie Skłodowska-Curie Actions/ ; 2019-02336//Svenska Forskningsrådet Formas/ ; 2017-04422//Vetenskapsrådet/ ; 2018-05973//Vetenskapsrådet/ ; }, mesh = {Phylogeny ; *Bacteria/genetics ; Metagenome ; Algorithms ; *Microbiota ; Metagenomics/methods ; }, abstract = {At the genome level, microorganisms are highly adaptable both in terms of allele and gene composition. Such heritable traits emerge in response to different environmental niches and can have a profound influence on microbial community dynamics. As a consequence, any individual genome or population will contain merely a fraction of the total genetic diversity of any operationally defined "species", whose ecological potential can thus be only fully understood by studying all of their genomes and the genes therein. This concept, known as the pangenome, is valuable for studying microbial ecology and evolution, as it partitions genomes into core (present in all the genomes from a species, and responsible for housekeeping and species-level niche adaptation among others) and accessory regions (present only in some, and responsible for intra-species differentiation). Here we present SuperPang, an algorithm producing pangenome assemblies from a set of input genomes of varying quality, including metagenome-assembled genomes (MAGs). SuperPang runs in linear time and its results are complete, non-redundant, preserve gene ordering and contain both coding and non-coding regions. Our approach provides a modular view of the pangenome, identifying operons and genomic islands, and allowing to track their prevalence in different populations. We illustrate this by analysing intra-species diversity in Polynucleobacter, a bacterial genus ubiquitous in freshwater ecosystems, characterized by their streamlined genomes and their ecological versatility. We show how SuperPang facilitates the simultaneous analysis of allelic and gene content variation under different environmental pressures, allowing us to study the drivers of microbial diversification at unprecedented resolution.}, } @article {pmid37379037, year = {2023}, author = {Madhusoodanan, J}, title = {A More Diverse and Complete Reference Human Genome Is Poised to Change Medicine.}, journal = {JAMA}, volume = {330}, number = {3}, pages = {205-206}, doi = {10.1001/jama.2023.9498}, pmid = {37379037}, issn = {1538-3598}, mesh = {Humans ; *Genome, Human/genetics ; *Genomics/standards ; *Medicine/trends ; }, } @article {pmid37377491, year = {2023}, author = {Karanth, S and Patel, J and Shirmohammadi, A and Pradhan, AK}, title = {Machine learning to predict foodborne salmonellosis outbreaks based on genome characteristics and meteorological trends.}, journal = {Current research in food science}, volume = {6}, number = {}, pages = {100525}, pmid = {37377491}, issn = {2665-9271}, abstract = {Several studies have shown a correlation between outbreaks of Salmonella enterica and meteorological trends, especially related to temperature and precipitation. Additionally, current studies based on outbreaks are performed on data for the species Salmonella enterica, without considering its intra-species and genetic heterogeneity. In this study, we analyzed the effect of differential gene expression and a suite of meteorological factors on salmonellosis outbreak scale (typified by case numbers) using a combination of machine learning and count-based modeling methods. Elastic Net regularization model was used to identify significant genes from a Salmonella pan-genome, and a multi-variable Poisson regression developed to fit the individual and mixed effects data. The best-fit Elastic Net model (α = 0.50; λ = 2.18) identified 53 significant gene features. The final multi-variable Poisson regression model (χ[2] = 5748.22; pseudo R[2] = 0.669; probability > χ[2] = 0) identified 127 significant predictor terms (p < 0.10), comprising 45 gene-only predictors, average temperature, average precipitation, and average snowfall, and 79 gene-meteorological interaction terms. The significant genes ranged in functionality from cellular signaling and transport, virulence, metabolism, and stress response, and included gene variables not considered as significant by the baseline model. This study presents a holistic approach towards evaluating multiple data sources (such as genomic and environmental data) to predict outbreak scale, which could help in revising the estimates for human health risk.}, } @article {pmid37375105, year = {2023}, author = {Myintzaw, P and Pennone, V and McAuliffe, O and Begley, M and Callanan, M}, title = {Association of Virulence, Biofilm, and Antimicrobial Resistance Genes with Specific Clonal Complex Types of Listeria monocytogenes.}, journal = {Microorganisms}, volume = {11}, number = {6}, pages = {}, pmid = {37375105}, issn = {2076-2607}, support = {15F604, 2019R495//Department of Agriculture Food and the Marine/ ; }, abstract = {Precise classification of foodborne pathogen Listeria monocytogenes is a necessity in efficient foodborne disease surveillance, outbreak detection, and source tracking throughout the food chain. In this study, a total of 150 L. monocytogenes isolates from various food products, food processing environments, and clinical sources were investigated for variations in virulence, biofilm formation, and the presence of antimicrobial resistance genes based on their Whole-Genome Sequences. Clonal complex (CC) determination based on Multi-Locus Sequence Typing (MLST) revealed twenty-eight CC-types including eight isolates representing novel CC-types. The eight isolates comprising the novel CC-types share the majority of the known (cold and acid) stress tolerance genes and are all genetic lineage II, serogroup 1/2a-3a. Pan-genome-wide association analysis by Scoary using Fisher's exact test identified eleven genes specifically associated with clinical isolates. Screening for the presence of antimicrobial and virulence genes using the ABRicate tool uncovered variations in the presence of Listeria Pathogenicity Islands (LIPIs) and other known virulence genes. Specifically, the distributions of actA, ecbA, inlF, inlJ, lapB, LIPI-3, and vip genes across isolates were found to be significantly CC-dependent while the presence of ami, inlF, inlJ, and LIPI-3 was associated with clinical isolates specifically. In addition, Roary-derived phylogenetic grouping based on Antimicrobial-Resistant Genes (AMRs) revealed that the thiol transferase (FosX) gene was present in all lineage I isolates, and the presence of the lincomycin resistance ABC-F-type ribosomal protection protein (lmo0919_fam) was also genetic-lineage-dependent. More importantly, the genes found to be specific to CC-type were consistent when a validation analysis was performed with fully assembled, high-quality complete L. monocytogenes genome sequences (n = 247) extracted from the National Centre for Biotechnology Information (NCBI) microbial genomes database. This work highlights the usefulness of MLST-based CC typing using the Whole-Genome Sequence as a tool in classifying isolates.}, } @article {pmid37374997, year = {2023}, author = {Negrete-Paz, AM and Vázquez-Marrufo, G and Gutiérrez-Moraga, A and Vázquez-Garcidueñas, MS}, title = {Pangenome Reconstruction of Mycobacterium tuberculosis as a Guide to Reveal Genomic Features Associated with Strain Clinical Phenotype.}, journal = {Microorganisms}, volume = {11}, number = {6}, pages = {}, pmid = {37374997}, issn = {2076-2607}, support = {PICIR-021//Instituto de Ciencia, Tecnología e Innovación de Michoacán/ ; }, abstract = {Tuberculosis (TB) is one of the leading causes of human deaths worldwide caused by infectious diseases. TB infection by Mycobacterium tuberculosis can occur in the lungs, causing pulmonary tuberculosis (PTB), or in any other organ of the body, resulting in extrapulmonary tuberculosis (EPTB). There is no consensus on the genetic determinants of this pathogen that may contribute to EPTB. In this study, we constructed the M. tuberculosis pangenome and used it as a tool to seek genomic signatures associated with the clinical presentation of TB based on its accessory genome differences. The analysis carried out in the present study includes the raw reads of 490 M. tuberculosis genomes (PTB n = 245, EPTB n = 245) retrieved from public databases that were assembled, as well as ten genomes from Mexican strains (PTB n = 5, EPTB n = 5) that were sequenced and assembled. All genomes were annotated and then used to construct the pangenome with Roary and Panaroo. The pangenome obtained using Roary consisted of 2231 core genes and 3729 accessory genes. On the other hand, the pangenome resulting from Panaroo consisted of 2130 core genes and 5598 accessory genes. Associations between the distribution of accessory genes and the PTB/EPTB phenotypes were examined using the Scoary and Pyseer tools. Both tools found a significant association between the hspR, plcD, Rv2550c, pe_pgrs5, pe_pgrs25, and pe_pgrs57 genes and the PTB genotype. In contrast, the deletion of the aceA, esxR, plcA, and ppe50 genes was significantly associated with the EPTB phenotype. Rv1759c and Rv3740 were found to be associated with the PTB phenotype according to Scoary; however, these associations were not observed when using Pyseer. The robustness of the constructed pangenome and the gene-phenotype associations is supported by several factors, including the analysis of a large number of genomes, the inclusion of the same number of PTB/EPTB genomes, and the reproducibility of results thanks to the different bioinformatic tools used. Such characteristics surpass most of previous M. tuberculosis pangenomes. Thus, it can be inferred that the deletion of these genes can lead to changes in the processes involved in stress response and fatty acid metabolism, conferring phenotypic advantages associated with pulmonary or extrapulmonary presentation of TB. This study represents the first attempt to use the pangenome to seek gene-phenotype associations in M. tuberculosis.}, } @article {pmid37374927, year = {2023}, author = {Uljanovas, D and Gölz, G and Fleischmann, S and Kudirkiene, E and Kasetiene, N and Grineviciene, A and Tamuleviciene, E and Aksomaitiene, J and Alter, T and Malakauskas, M}, title = {Genomic Characterization of Arcobacter butzleri Strains Isolated from Various Sources in Lithuania.}, journal = {Microorganisms}, volume = {11}, number = {6}, pages = {}, pmid = {37374927}, issn = {2076-2607}, support = {01KI1712//Federal Ministry of Education and Research/ ; }, abstract = {Arcobacter (A.) butzleri, the most widespread species within the genus Arcobacter, is considered as an emerging pathogen causing gastroenteritis in humans. Here, we performed a comparative genome-wide analysis of 40 A. butzleri strains from Lithuania to determine the genetic relationship, pangenome structure, putative virulence, and potential antimicrobial- and heavy-metal-resistance genes. Core genome single nucleotide polymorphism (cgSNP) analysis revealed low within-group variability (≤4 SNPs) between three milk strains (RCM42, RCM65, RCM80) and one human strain (H19). Regardless of the type of input (i.e., cgSNPs, accessory genome, virulome, resistome), these strains showed a recurrent phylogenetic and hierarchical grouping pattern. A. butzleri demonstrated a relatively large and highly variable accessory genome (comprising of 6284 genes with around 50% of them identified as singletons) that only partially correlated to the isolation source. Downstream analysis of the genomes resulted in the detection of 115 putative antimicrobial- and heavy-metal-resistance genes and 136 potential virulence factors that are associated with the induction of infection in host (e.g., cadF, degP, iamA), survival and environmental adaptation (e.g., flagellar genes, CheA-CheY chemotaxis system, urease cluster). This study provides additional knowledge for a better A. butzleri-related risk assessment and highlights the need for further genomic epidemiology studies in Lithuania and other countries.}, } @article {pmid37374141, year = {2023}, author = {Abondio, P and Cilli, E and Luiselli, D}, title = {Human Pangenomics: Promises and Challenges of a Distributed Genomic Reference.}, journal = {Life (Basel, Switzerland)}, volume = {13}, number = {6}, pages = {}, pmid = {37374141}, issn = {2075-1729}, abstract = {A pangenome is a collection of the common and unique genomes that are present in a given species. It combines the genetic information of all the genomes sampled, resulting in a large and diverse range of genetic material. Pangenomic analysis offers several advantages compared to traditional genomic research. For example, a pangenome is not bound by the physical constraints of a single genome, so it can capture more genetic variability. Thanks to the introduction of the concept of pangenome, it is possible to use exceedingly detailed sequence data to study the evolutionary history of two different species, or how populations within a species differ genetically. In the wake of the Human Pangenome Project, this review aims at discussing the advantages of the pangenome around human genetic variation, which are then framed around how pangenomic data can inform population genetics, phylogenetics, and public health policy by providing insights into the genetic basis of diseases or determining personalized treatments, targeting the specific genetic profile of an individual. Moreover, technical limitations, ethical concerns, and legal considerations are discussed.}, } @article {pmid37372961, year = {2023}, author = {Abdul Aziz, M and Masmoudi, K}, title = {Insights into the Transcriptomics of Crop Wild Relatives to Unravel the Salinity Stress Adaptive Mechanisms.}, journal = {International journal of molecular sciences}, volume = {24}, number = {12}, pages = {}, pmid = {37372961}, issn = {1422-0067}, support = {12F041//United Arab Emirates University/ ; }, mesh = {*Transcriptome ; *Plant Breeding ; Gene Expression Profiling ; Genomics ; Salt Tolerance/genetics ; Salinity ; }, abstract = {The narrow genomic diversity of modern cultivars is a major bottleneck for enhancing the crop's salinity stress tolerance. The close relatives of modern cultivated plants, crop wild relatives (CWRs), can be a promising and sustainable resource to broaden the diversity of crops. Advances in transcriptomic technologies have revealed the untapped genetic diversity of CWRs that represents a practical gene pool for improving the plant's adaptability to salt stress. Thus, the present study emphasizes the transcriptomics of CWRs for salinity stress tolerance. In this review, the impacts of salt stress on the plant's physiological processes and development are overviewed, and the transcription factors (TFs) regulation of salinity stress tolerance is investigated. In addition to the molecular regulation, a brief discussion on the phytomorphological adaptation of plants under saline environments is provided. The study further highlights the availability and use of transcriptomic resources of CWR and their contribution to pangenome construction. Moreover, the utilization of CWRs' genetic resources in the molecular breeding of crops for salinity stress tolerance is explored. Several studies have shown that cytoplasmic components such as calcium and kinases, and ion transporter genes such as Salt Overly Sensitive 1 (SOS1) and High-affinity Potassium Transporters (HKTs) are involved in the signaling of salt stress, and in mediating the distribution of excess Na[+] ions within the plant cells. Recent comparative analyses of transcriptomic profiling through RNA sequencing (RNA-Seq) between the crops and their wild relatives have unraveled several TFs, stress-responsive genes, and regulatory proteins for generating salinity stress tolerance. This review specifies that the use of CWRs transcriptomics in combination with modern breeding experimental approaches such as genomic editing, de novo domestication, and speed breeding can accelerate the CWRs utilization in the breeding programs for enhancing the crop's adaptability to saline conditions. The transcriptomic approaches optimize the crop genomes with the accumulation of favorable alleles that will be indispensable for designing salt-resilient crops.}, } @article {pmid37369325, year = {2023}, author = {Thorwall, S and Trivedi, V and Ottum, E and Wheeldon, I}, title = {Population genomics-guided engineering of phenazine biosynthesis in Pseudomonas chlororaphis.}, journal = {Metabolic engineering}, volume = {78}, number = {}, pages = {223-234}, doi = {10.1016/j.ymben.2023.06.008}, pmid = {37369325}, issn = {1096-7184}, mesh = {*Pseudomonas chlororaphis/genetics/metabolism ; Metagenomics ; Genome-Wide Association Study ; Pseudomonas/genetics/metabolism ; Phenazines/metabolism ; Bacterial Proteins/genetics/metabolism ; }, abstract = {The emergence of next-generation sequencing (NGS) technologies has made it possible to not only sequence entire genomes, but also identify metabolic engineering targets across the pangenome of a microbial population. This study leverages NGS data as well as existing molecular biology and bioinformatics tools to identify and validate genomic signatures for improving phenazine biosynthesis in Pseudomonas chlororaphis. We sequenced a diverse collection of 34 Pseudomonas isolates using short- and long-read sequencing techniques and assembled whole genomes using the NGS reads. In addition, we assayed three industrially relevant phenotypes (phenazine production, biofilm formation, and growth temperature) for these isolates in two different media conditions. We then provided the whole genomes and phenazine production data to a unitig-based microbial genome-wide association study (mGWAS) tool to identify novel genomic signatures responsible for phenazine production in P. chlororaphis. Post-processing of the mGWAS analysis results yielded 330 significant hits influencing the biosynthesis of one or more phenazine compounds. Based on a quantitative metric (called the phenotype score), we elucidated the most influential hits for phenazine production and experimentally validated them in vivo in the most optimal phenazine producing strain. Two genes significantly increased phenazine-1-carboxamide (PCN) production: a histidine transporter (ProY_1), and a putative carboxypeptidase (PS__04251). A putative MarR-family transcriptional regulator decreased PCN titer when overexpressed in a high PCN producing isolate. Overall, this work seeks to demonstrate the utility of a population genomics approach as an effective strategy in enabling the identification of targets for metabolic engineering of bioproduction hosts.}, } @article {pmid37365340, year = {2023}, author = {Chin, CS and Behera, S and Khalak, A and Sedlazeck, FJ and Sudmant, PH and Wagner, J and Zook, JM}, title = {Multiscale analysis of pangenomes enables improved representation of genomic diversity for repetitive and clinically relevant genes.}, journal = {Nature methods}, volume = {20}, number = {8}, pages = {1213-1221}, pmid = {37365340}, issn = {1548-7105}, support = {R35 GM142916/GM/NIGMS NIH HHS/United States ; R35GM142916/GM/NIGMS NIH HHS/United States ; 1U01HG011758-01/HG/NHGRI NIH HHS/United States ; UM1 HG008898/HG/NHGRI NIH HHS/United States ; U01 HG011758/HG/NHGRI NIH HHS/United States ; }, mesh = {Male ; Humans ; *Genomics ; *Genome, Human ; Major Histocompatibility Complex ; }, abstract = {Advancements in sequencing technologies and assembly methods enable the regular production of high-quality genome assemblies characterizing complex regions. However, challenges remain in efficiently interpreting variation at various scales, from smaller tandem repeats to megabase rearrangements, across many human genomes. We present a PanGenome Research Tool Kit (PGR-TK) enabling analyses of complex pangenome structural and haplotype variation at multiple scales. We apply the graph decomposition methods in PGR-TK to the class II major histocompatibility complex demonstrating the importance of the human pangenome for analyzing complicated regions. Moreover, we investigate the Y-chromosome genes, DAZ1/DAZ2/DAZ3/DAZ4, of which structural variants have been linked to male infertility, and X-chromosome genes OPN1LW and OPN1MW linked to eye disorders. We further showcase PGR-TK across 395 complex repetitive medically important genes. This highlights the power of PGR-TK to resolve complex variation in regions of the genome that were previously too complex to analyze.}, } @article {pmid37364097, year = {2023}, author = {Fayyaz, A and Robinson, G and Chang, PL and Bekele, D and Yimer, S and Carrasquilla-Garcia, N and Negash, K and Surendrarao, A and von Wettberg, EJB and Kemal, SA and Tesfaye, K and Fikre, A and Farmer, AD and Cook, DR}, title = {Hiding in plain sight: Genome-wide recombination and a dynamic accessory genome drive diversity in Fusarium oxysporum f.sp. ciceris.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {27}, pages = {e2220570120}, pmid = {37364097}, issn = {1091-6490}, mesh = {*Fusarium/genetics ; Host Specificity ; Genomics ; Agriculture ; Plant Diseases/genetics ; }, abstract = {Understanding the origins of variation in agricultural pathogens is of fundamental interest and practical importance, especially for diseases that threaten food security. Fusarium oxysporum is among the most important of soil-borne pathogens, with a global distribution and an extensive host range. The pathogen is considered to be asexual, with horizontal transfer of chromosomes providing an analog of assortment by meiotic recombination. Here, we challenge those assumptions based on the results of population genomic analyses, describing the pathogen's diversity and inferring its origins and functional consequences in the context of a single, long-standing agricultural system. We identify simultaneously low nucleotide distance among strains, and unexpectedly high levels of genetic and genomic variability. We determine that these features arise from a combination of genome-scale recombination, best explained by widespread sexual reproduction, and presence-absence variation consistent with chromosomal rearrangement. Pangenome analyses document an accessory genome more than twice the size of the core genome, with contrasting evolutionary dynamics. The core genome is stable, with low diversity and high genetic differentiation across geographic space, while the accessory genome is paradoxically more diverse and unstable but with lower genetic differentiation and hallmarks of contemporary gene flow at local scales. We suggest a model in which episodic sexual reproduction generates haplotypes that are selected and then maintained through clone-like dynamics, followed by contemporary genomic rearrangements that reassort the accessory genome among sympatric strains. Taken together, these processes contribute unique genome content, including reassortment of virulence determinants that may explain observed variation in pathogenic potential.}, } @article {pmid37361319, year = {2023}, author = {Torres-Morales, J and Mark Welch, JL and Dewhirst, FE and Borisy, GG}, title = {Site-specialization of human oral Gemella species.}, journal = {Journal of oral microbiology}, volume = {15}, number = {1}, pages = {2225261}, pmid = {37361319}, issn = {2000-2297}, abstract = {Gemella species are core members of the human oral microbiome in healthy subjects and are regarded as commensals, although they can cause opportunistic infections. Our objective was to evaluate the site-specialization of Gemella species among various habitats within the mouth by combining pangenomics and metagenomics. With pangenomics, we identified genome relationships and categorized genes as core and accessory to each species. With metagenomics, we identified the primary oral habitat of individual genomes. Our results establish that the genomes of three species, G. haemolysans, G. sanguinis and G. morbillorum, are abundant and prevalent in human mouths at different oral sites: G. haemolysans on buccal mucosa and keratinized gingiva; G. sanguinis on tongue dorsum, throat, and tonsils; and G. morbillorum in dental plaque. The gene-level basis of site-specificity was investigated by identifying genes that were core to Gemella genomes at a specific oral site but absent from other Gemella genomes. The riboflavin biosynthesis pathway was present in G. haemolysans genomes associated with buccal mucosa but absent from the rest of the genomes. Overall, metapangenomics show that Gemella species have clear ecological preferences in the oral cavity of healthy humans and provides an approach to identifying gene-level drivers of site specificity.}, } @article {pmid37359562, year = {2023}, author = {Touray, BJB and Hanafy, M and Phanse, Y and Hildebrand, R and Talaat, AM}, title = {Protective RNA nanovaccines against Mycobacterium avium subspecies hominissuis.}, journal = {Frontiers in immunology}, volume = {14}, number = {}, pages = {1188754}, pmid = {37359562}, issn = {1664-3224}, mesh = {Animals ; Mice ; *Mycobacterium avium/physiology ; *Mycobacterium tuberculosis ; Interleukin-2 ; RNA ; RNA, Messenger/genetics ; }, abstract = {The induction of an effective immune response is critical for the success of mRNA-based therapeutics. Here, we developed a nanoadjuvant system compromised of Quil-A and DOTAP (dioleoyl 3 trimethylammonium propane), hence named QTAP, for the efficient delivery of mRNA vaccine constructs into cells. Electron microscopy indicated that the complexation of mRNA with QTAP forms nanoparticles with an average size of 75 nm and which have ~90% encapsulation efficiency. The incorporation of pseudouridine-modified mRNA resulted in higher transfection efficiency and protein translation with low cytotoxicity than unmodified mRNA. When QTAP-mRNA or QTAP alone transfected macrophages, pro-inflammatory pathways (e.g., NLRP3, NF-kb, and MyD88) were upregulated, an indication of macrophage activation. In C57Bl/6 mice, QTAP nanovaccines encoding Ag85B and Hsp70 transcripts (QTAP-85B+H70) were able to elicit robust IgG antibody and IFN- ɣ, TNF-α, IL-2, and IL-17 cytokines responses. Following aerosol challenge with a clinical isolate of M. avium ss. hominissuis (M.ah), a significant reduction of mycobacterial counts was observed in lungs and spleens of only immunized animals at both 4- and 8-weeks post-challenge. As expected, reduced levels of M. ah were associated with diminished histological lesions and robust cell-mediated immunity. Interestingly, polyfunctional T-cells expressing IFN- ɣ, IL-2, and TNF- α were detected at 8 but not 4 weeks post-challenge. Overall, our analysis indicated that QTAP is a highly efficient transfection agent and could improve the immunogenicity of mRNA vaccines against pulmonary M. ah, an infection of significant public health importance, especially to the elderly and to those who are immune compromised.}, } @article {pmid37358412, year = {2023}, author = {Li, T and Huang, J and Yang, S and Chen, J and Yao, Z and Zhong, M and Zhong, X and Ye, X}, title = {Pan-Genome-Wide Association Study of Serotype 19A Pneumococci Identifies Disease-Associated Genes.}, journal = {Microbiology spectrum}, volume = {11}, number = {4}, pages = {e0407322}, pmid = {37358412}, issn = {2165-0497}, mesh = {Humans ; *Streptococcus pneumoniae ; Serogroup ; Genome-Wide Association Study ; *Pneumococcal Infections ; Pneumococcal Vaccines/genetics ; Serotyping ; }, abstract = {Despite the widespread implementation of pneumococcal vaccines, hypervirulent Streptococcus pneumoniae serotype 19A is endemic worldwide. It is still unclear whether specific genetic elements contribute to complex pathogenicity of serotype 19A isolates. We performed a large-scale pan-genome-wide association study (pan-GWAS) of 1,292 serotype 19A isolates sampled from patients with invasive disease and asymptomatic carriers. To address the underlying disease-associated genotypes, a comprehensive analysis using three methods (Scoary, a linear mixed model, and random forest) was performed to compare disease and carriage isolates to identify genes consistently associated with disease phenotype. By using three pan-GWAS methods, we found consensus on statistically significant associations between genotypes and disease phenotypes (disease or carriage), with a subset of 30 consistently significant disease-associated genes. The results of functional annotation revealed that these disease-associated genes had diverse predicted functions, including those that participated in mobile genetic elements, antibiotic resistance, virulence, and cellular metabolism. Our findings suggest the multifactorial pathogenicity nature of this hypervirulent serotype and provide important evidence for the design of novel protein-based vaccines to prevent and control pneumococcal disease. IMPORTANCE It is important to understand the genetic and pathogenic characteristics of S. pneumoniae serotype 19A, which may provide important information for the prevention and treatment of pneumococcal disease. This global large-sample pan-GWAS study has identified a subset of 30 consistently significant disease-associated genes that are involved in mobile genetic elements, antibiotic resistance, virulence, and cellular metabolism. These findings suggest the multifactorial pathogenicity nature of hypervirulent S. pneumoniae serotype 19A isolates and provide implications for the design of novel protein-based vaccines.}, } @article {pmid37356834, year = {2023}, author = {Prakash, JAJ and Jacob, JJ and Rachel, T and Vasudevan, K and Amladi, A and Iyadurai, R and Manesh, A and Veeraraghavan, B}, title = {Genomic analysis of Brucella melitensis reveals new insights into phylogeny and evolutionary divergence.}, journal = {Indian journal of medical microbiology}, volume = {44}, number = {}, pages = {100360}, doi = {10.1016/j.ijmmb.2023.02.003}, pmid = {37356834}, issn = {1998-3646}, mesh = {Humans ; *Brucella melitensis/genetics ; Phylogeny ; Genome-Wide Association Study ; *Brucellosis/epidemiology ; Genomics ; Genotype ; }, abstract = {PURPOSE: Brucellosis is a bacterial zoonotic disease caused by genus Brucella. The disease is often transmitted to humans by direct or indirect contact with infected livestock or from laboratory exposure. In this study two clinical isolates of Brucella melitensis were subjected to whole genome sequencing (WGS) using Ion Torrent PGM and Oxford Nanopore MinIon platform.

METHODS: The two hybrid complete genomes were subjected to core gene SNP analysis to identify the relative evolutionary position. To distinguish between the various lineages of B. melitensis, Pangenome analysis was carried out.

RESULTS: Phylogenetic analysis revealed that both the study isolates (ST8) clustered along the other Asian isolates that formed genotype II. Genome wide analyses of 326 B melitensis isolates suggests 2171 gene clusters were shared across all the genomes while 3552 gene clusters were considered as accessory genes.

CONCLUSION: Here we attempted to provide the gain and loss of six unique genes that defined the phylogenetic lineages and complex evolutionary process. As the severity and prevalence of human brucellosis is increasing a better understanding of Brucella genomics and transmission dynamics is needed.}, } @article {pmid37356030, year = {2023}, author = {Gupta, RK and Tikariha, H and Purohit, HJ and Khardenavis, AA}, title = {Pangenome-driven insights into nitrogen metabolic characteristics of Citrobacter portucalensis strain AAK_AS5 associated with wastewater nitrogen removal.}, journal = {Archives of microbiology}, volume = {205}, number = {7}, pages = {270}, pmid = {37356030}, issn = {1432-072X}, mesh = {*Wastewater ; *Denitrification ; Nitrates ; Ammonia ; Nitrogen/metabolism ; Nitrification ; Citrobacter/genetics/metabolism ; Heterotrophic Processes ; Aerobiosis ; Nitrites/metabolism ; }, abstract = {Nitrogen metabolism in the genus Citrobacter is very poorly studied despite its several implications in wastewater treatment. In the current study, Citrobacter portucalensis strain AAK_AS5 was assessed for remediation of simulated wastewater supplemented with different inorganic nitrogen sources. Combination of (NH4)2SO4 with KNO3 was the most preferred for achieving high growth density followed by (NH4)2SO4 and KNO3 alone. This was in agreement with highest ammonical nitrogen removal of 92.9% in the presence of combined nitrogen sources and the corresponding nitrate nitrogen removal of 93% in the presence of KNO3. Furthermore, these removal capacities were validated by investigating the uniqueness and the spread of metabolic features through pan-genomic approach that revealed the largest number of unique genes (2097) and accessory genes (705) in strain AAK_AS5. Of the total 44 different types of nitrogen metabolism-related genes, 39 genes were associated with the core genome, while 5 genes such as gltI, nasA, nasR, nrtA, and ntrC uniquely belonged to the accessory genome. Strain AAK_AS5 possessed three major nitrate removal pathways viz., assimilatory and dissimilatory nitrate reduction to ammonia (ANRA & DNRA), and denitrification; however, the absence of nitrification was compensated by ammonia assimilation catalyzed by gene products of the GDH and GS-GOGAT pathways. narGHIJ encoding the respiratory nitrate reductase was commonly identified in all the studied genomes, while genes such as nirK, norB, and nosZ were uniquely present in the strain AAK_AS5 only. A markedly different genetic content and metabolic diversity between the strains reflected their adaptive evolution in the environment thus highlighting the significance of C. portucalensis AAK_AS5 for potential application in nitrogen removal from wastewater.}, } @article {pmid37354526, year = {2023}, author = {Masutani, B and Suzuki, Y and Suzuki, Y and Morishita, S}, title = {JTK: targeted diploid genome assembler.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {7}, pages = {}, pmid = {37354526}, issn = {1367-4811}, mesh = {*Diploidy ; Sequence Analysis, DNA ; *High-Throughput Nucleotide Sequencing ; Genome ; Genomics ; Haplotypes ; }, abstract = {MOTIVATION: Diploid assembly, or determining sequences of homologous chromosomes separately, is essential to elucidate genetic differences between haplotypes. One approach is to call and phase single nucleotide variants (SNVs) on a reference sequence. However, this approach becomes unstable on large segmental duplications (SDs) or structural variations (SVs) because the alignments of reads deriving from these regions tend to be unreliable. Another approach is to use highly accurate PacBio HiFi reads to output diploid assembly directly. Nonetheless, HiFi reads cannot phase homozygous regions longer than their length and require oxford nanopore technology (ONT) reads or Hi-C to produce a fully phased assembly. Is a single long-read sequencing technology sufficient to create an accurate diploid assembly?

RESULTS: Here, we present JTK, a megabase-scale diploid genome assembler. It first randomly samples kilobase-scale sequences (called 'chunks') from the long reads, phases variants found on them, and produces two haplotypes. The novel idea of JTK is to utilize chunks to capture SNVs and SVs simultaneously. From 60-fold ONT reads on the HG002 and a Japanese sample, it fully assembled two haplotypes with approximately 99.9% accuracy on the histocompatibility complex (MHC) and the leukocyte receptor complex (LRC) regions, which was impossible by the reference-based approach. In addition, in the LRC region on a Japanese sample, JTK output an assembly of better contiguity than those built from high-coverage HiFi+Hi-C. In the coming age of pan-genomics, JTK would complement the reference-based phasing method to assemble the difficult-to-assemble but medically important regions.

JTK is available at https://github.com/ban-m/jtk, and the datasets are available at https://doi.org/10.5281/zenodo.7790310 or JGAS000580 in DDBJ.}, } @article {pmid37353434, year = {2023}, author = {Wang, B and Dang, N and Yang, X and Xu, S and Ye, K}, title = {The human pangenome reference: the beginning of a new era for genomics.}, journal = {Science bulletin}, volume = {68}, number = {14}, pages = {1484-1487}, doi = {10.1016/j.scib.2023.06.014}, pmid = {37353434}, issn = {2095-9281}, mesh = {Humans ; *Genomics ; *Software ; }, } @article {pmid37349950, year = {2023}, author = {Pei, Z and Liu, Y and Yi, Z and Liao, J and Wang, H and Zhang, H and Chen, W and Lu, W}, title = {Diversity within the species Clostridium butyricum: pan-genome, phylogeny, prophage, carbohydrate utilization, and antibiotic resistance.}, journal = {Journal of applied microbiology}, volume = {134}, number = {7}, pages = {}, doi = {10.1093/jambio/lxad127}, pmid = {37349950}, issn = {1365-2672}, support = {32021005//National Natural Science Foundation of China/ ; BK20221070//Natural Science Foundation of Jiangsu Province/ ; }, mesh = {Humans ; *Clostridium butyricum/genetics ; Prophages/genetics ; Phylogeny ; Drug Resistance, Microbial/genetics ; Carbohydrates ; }, abstract = {AIMS: Clostridium butyricum has been recognized as a strong candidate for the "next generation of probiotics" due to its beneficial roles on humans. Owing to our current understanding of this species is limited, it is imperative to unveil the genetic variety and biological properties of C. butyricum on sufficient strains.

METHODS AND RESULTS: We isolated 53 C. butyricum strains and collected 25 publicly available genomes to comprehensively assess the genomic and phenotypic diversity of this species. Average nucleotide identity and phylogeny suggested that multiple C. butyricum strains might share the same niche. Clostridium butyricum genomes were replete with prophage elements, but the CRISPR-positive strain efficiently inhibited prophage integration. Clostridium butyricum utilizes cellulose, alginate, and soluble starch universally, and shows general resistance to aminoglycoside antibiotics.

CONCLUSIONS: Clostridium butyricum exhibited a broad genetic diversity from the extraordinarily open pan-genome, extremely convergent core genome, and ubiquitous prophages. In carbohydrate utilization and antibiotic resistance, partial genotypes have a certain guiding significance for phenotypes.}, } @article {pmid37349608, year = {2023}, author = {Manivannan, A and Cheeran Amal, T}, title = {Deciphering the complex cotton genome for improving fiber traits and abiotic stress resilience in sustainable agriculture.}, journal = {Molecular biology reports}, volume = {50}, number = {8}, pages = {6937-6953}, pmid = {37349608}, issn = {1573-4978}, mesh = {*Genome, Plant/genetics ; *Plant Breeding ; Polyploidy ; Agriculture ; Stress, Physiological/genetics ; Gossypium/genetics ; Cotton Fiber ; }, abstract = {BACKGROUND: Understanding the complex cotton genome is of paramount importance in devising a strategy for sustainable agriculture. Cotton is probably the most economically important cash crop known for its cellulose-rich fiber content. The cotton genome has become an ideal model for deciphering polyploidization due to its polyploidy, setting it apart from other major crops. However, the main challenge in understanding the functional and regulatory functions of many genes in cotton is still the complex cotton polyploidy genome, which is not limited to a single role. Cotton production is vulnerable to the sensitive effects of climate change, which can alter or aggravate soil, pests, and diseases. Thus, conventional plant breeding coupled with advanced technologies has led to substantial progress being made in cotton production.

GENOMICS APPROACHES IN COTTON: In the frontier areas of genomics research, cotton genomics has gained momentum accomplished by robust high-throughput sequencing platforms combined with novel computational tools to make the cotton genome more tractable. Advances in long-read sequencing have allowed for the generation of the complete set of cotton gene transcripts giving incisive scientific knowledge in cotton improvement. In contrast, the integration of the latest sequencing platforms has been used to generate multiple high-quality reference genomes in diploid and tetraploid cotton. While pan-genome and 3D genomic studies are still in the early stages in cotton, it is anticipated that rapid advances in sequencing, assembly algorithms, and analysis pipelines will have a greater impact on advanced cotton research.

CONCLUSIONS: This review article briefly compiles substantial contributions in different areas of the cotton genome, which include genome sequencing, genes, and their molecular regulatory networks in fiber development and stress tolerance mechanism. This will greatly help us in understanding the robust genomic organization which in turn will help unearth candidate genes for functionally important agronomic traits.}, } @article {pmid37341494, year = {2023}, author = {Potter, RF and Zhang, K and Reimler, B and Marino, J and Muenks, CE and Alvarado, K and Wallace, MA and Westblade, LF and McElvania, E and Yarbrough, ML and Hunstad, DA and Dantas, G and Burnham, CD}, title = {Uncharacterized and lineage-specific accessory genes within the Proteus mirabilis pan-genome landscape.}, journal = {mSystems}, volume = {8}, number = {4}, pages = {e0015923}, pmid = {37341494}, issn = {2379-5077}, support = {R01 AI158418/AI/NIAID NIH HHS/United States ; R01 HS027621/HS/AHRQ HHS/United States ; R01 AI155893/AI/NIAID NIH HHS/United States ; U01 AI123394/AI/NIAID NIH HHS/United States ; }, mesh = {Humans ; *Proteus mirabilis/genetics ; *Proteomics ; Phylogeny ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {Proteus mirabilis is a Gram-negative bacterium recognized for its unique swarming motility and urease activity. A previous proteomic report on four strains hypothesized that, unlike other Gram-negative bacteria, P. mirabilis may not exhibit significant intraspecies variation in gene content. However, there has not been a comprehensive analysis of large numbers of P. mirabilis genomes from various sources to support or refute this hypothesis. We performed comparative genomic analysis on 2,060 Proteus genomes. We sequenced the genomes of 893 isolates recovered from clinical specimens from three large US academic medical centers, combined with 1,006 genomes from NCBI Assembly and 161 genomes assembled from Illumina reads in the public domain. We used average nucleotide identity (ANI) to delineate species and subspecies, core genome phylogenetic analysis to identify clusters of highly related P. mirabilis genomes, and pan-genome annotation to identify genes of interest not present in the model P. mirabilis strain HI4320. Within our cohort, Proteus is composed of 10 named species and 5 uncharacterized genomospecies. P. mirabilis can be subdivided into three subspecies; subspecies 1 represented 96.7% (1,822/1,883) of all genomes. The P. mirabilis pan-genome includes 15,399 genes outside of HI4320, and 34.3% (5,282/15,399) of these genes have no putative assigned function. Subspecies 1 is composed of several highly related clonal groups. Prophages and gene clusters encoding putatively extracellular-facing proteins are associated with clonal groups. Uncharacterized genes not present in the model strain P. mirabilis HI4320 but with homology to known virulence-associated operons can be identified within the pan-genome. IMPORTANCE Gram-negative bacteria use a variety of extracellular facing factors to interact with eukaryotic hosts. Due to intraspecies genetic variability, these factors may not be present in the model strain for a given organism, potentially providing incomplete understanding of host-microbial interactions. In contrast to previous reports on P. mirabilis, but similar to other Gram-negative bacteria, P. mirabilis has a mosaic genome with a linkage between phylogenetic position and accessory genome content. P. mirabilis encodes a variety of genes that may impact host-microbe dynamics beyond what is represented in the model strain HI4320. The diverse, whole-genome characterized strain bank from this work can be used in conjunction with reverse genetic and infection models to better understand the impact of accessory genome content on bacterial physiology and pathogenesis of infection.}, } @article {pmid37337218, year = {2023}, author = {Smith, TPL and Bickhart, DM and Boichard, D and Chamberlain, AJ and Djikeng, A and Jiang, Y and Low, WY and Pausch, H and Demyda-Peyrás, S and Prendergast, J and Schnabel, RD and Rosen, BD and , }, title = {The Bovine Pangenome Consortium: democratizing production and accessibility of genome assemblies for global cattle breeds and other bovine species.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {139}, pmid = {37337218}, issn = {1474-760X}, mesh = {Cattle/genetics ; Animals ; *Polymorphism, Single Nucleotide ; *Genomics ; Genome ; }, abstract = {The Bovine Pangenome Consortium (BPC) is an international collaboration dedicated to the assembly of cattle genomes to develop a more complete representation of cattle genomic diversity. The goal of the BPC is to provide genome assemblies and a community-agreed pangenome representation to replace breed-specific reference assemblies for cattle genomics. The BPC invites partners sharing our vision to participate in the production of these assemblies and the development of a common, community-approved, pangenome reference as a public resource for the research community (https://bovinepangenome.github.io/). This community-driven resource will provide the context for comparison between studies and the future foundation for cattle genomic selection.}, } @article {pmid37333201, year = {2023}, author = {Tran, TH and Roberts, AQ and F Escapa, I and Gao, W and Segre, JA and Kong, HH and Conlan, S and Kelly, MS and Lemon, KP}, title = {Metabolic capabilities are highly conserved among human nasal-associated Corynebacterium species in pangenomic analyses.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37333201}, support = {K23 AI135090/AI/NIAID NIH HHS/United States ; R01 GM117174/GM/NIGMS NIH HHS/United States ; R35 GM141806/GM/NIGMS NIH HHS/United States ; }, abstract = {UNLABELLED: Corynebact e rium species are globally ubiquitous in human nasal microbiota across the lifespan. Moreover, nasal microbiota profiles typified by higher relative abundances of Corynebacterium are often positively associated with health. Among the most common human nasal Corynebacterium species are C. propinquum , C. pseudodiphtheriticum, C. accolens , and C. tuberculostearicum . Based on the prevalence of these species, at least two likely coexist in the nasal microbiota of 82% of adults. To gain insight into the functions of these four species, we identified genomic, phylogenomic, and pangenomic properties and estimated the functional protein repertoire and metabolic capabilities of 87 distinct human nasal Corynebacterium strain genomes: 31 from Botswana and 56 from the U.S. C. pseudodiphtheriticum had geographically distinct clades consistent with localized strain circulation, whereas some strains from the other species had wide geographic distribution across Africa and North America. All four species had similar genomic and pangenomic structures. Gene clusters assigned to all COG metabolic categories were overrepresented in the persistent (core) compared to the accessory genome of each species indicating limited strain-level variability in metabolic capacity. Moreover, core metabolic capabilities were highly conserved among the four species indicating limited species-level metabolic variation. Strikingly, strains in the U.S. clade of C. pseudodiphtheriticum lacked genes for assimilatory sulfate reduction present in the Botswanan clade and in the other studied species, indicating a recent, geographically related loss of assimilatory sulfate reduction. Overall, the minimal species and strain variability in metabolic capacity implies coexisting strains might have limited ability to occupy distinct metabolic niches.

IMPORTANCE: Pangenomic analysis with estimation of functional capabilities facilitates our understanding of the full biologic diversity of bacterial species. We performed systematic genomic, phylogenomic, and pangenomic analyses with qualitative estimation of the metabolic capabilities of four common human nasal Corynebacterium species generating a foundational resource. The prevalence of each species in human nasal microbiota is consistent with the common coexistence of at least two species. We identified a notably high level of metabolic conservation within and among species indicating limited options for species to occupy distinct metabolic niches and pointing to the importance of investigating interactions among nasal Corynebacterium species. Comparing strains from two continents, C. pseudodiphtheriticum had restricted geographic strain distribution characterized by an evolutionarily recent loss of assimilatory sulfate reduction in North American strains. Our findings contribute to understanding the functions of Corynebacterium within human nasal microbiota and to evaluating their potential for future use as biotherapeutics.}, } @article {pmid37323942, year = {2023}, author = {Awori, RM and Waturu, CN and Pidot, SJ and Amugune, NO and Bode, HB}, title = {Draft genomes, phylogenomic reconstruction and comparative genome analysis of three Xenorhabdus strains isolated from soil-dwelling nematodes in Kenya.}, journal = {Access microbiology}, volume = {5}, number = {5}, pages = {}, pmid = {37323942}, issn = {2516-8290}, abstract = {As a proven source of potent and selective antimicrobials, Xenorhabdus bacteria are important to an age plagued with difficult-to-treat microbial infections. Yet, only 27 species have been described to date. In this study, a novel Xenorhabdus species was discovered through genomic studies on three isolates from Kenyan soils. Soils in Western Kenya were surveyed for steinernematids and Steinernema isolates VH1 and BG5 were recovered from red volcanic loam soils from cultivated land in Vihiga and clay soils from riverine land in Bungoma respectively. From the two nematode isolates, Xenorhabdus sp. BG5 and Xenorhabdus sp. VH1 were isolated. The genomes of these two, plus that of X. griffiniae XN45 - this was previously isolated from Steinernema sp. scarpo that also originated from Kenyan soils - were sequenced and assembled. Nascent genome assemblies of the three isolates were of good quality with over 70 % of their proteome having known functions. These three isolates formed the X. griffiniae clade in a phylogenomic reconstruction of the genus. Their species were delineated using three overall genome relatedness indices: an unnamed species of the genus, Xenorhabdus sp. BG5, X. griffiniae VH1 and X. griffiniae XN45. A pangenome analysis of this clade revealed that over 70 % of species-specific genes encoded unknown functions. Transposases were linked to genomic islands in Xenorhabdus sp. BG5. Thus, overall genome-related indices sufficiently delineated species of two new Xenorhabdus isolates from Kenya, both of which were closely related to X. griffiniae . The functions encoded by most species-specific genes in the X. griffiniae clade remain unknown.}, } @article {pmid37323913, year = {2023}, author = {Huang, W and Hu, S and Zhu, Y and Liu, S and Zhou, X and Fang, Y and Lu, Y and Wang, R}, title = {Metagenomic surveillance and comparative genomic analysis of Chlamydia psittaci in patients with pneumonia.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1157888}, pmid = {37323913}, issn = {1664-302X}, abstract = {Chlamydia psittaci, a strictly intracellular bacterium, is an underestimated etiologic agent leading to infections in a broad range of animals and mild illness or pneumonia in humans. In this study, the metagenomes of bronchoalveolar lavage fluids from the patients with pneumonia were sequenced and highly abundant C. psittaci was found. The target-enriched metagenomic reads were recruited to reconstruct draft genomes with more than 99% completeness. Two C. psittaci strains from novel sequence types were detected and these were closely related to the animal-borne isolates derived from the lineages of ST43 and ST28, indicating the zoonotic transmissions of C. psittaci would benefit its prevalence worldwide. Comparative genomic analysis combined with public isolate genomes revealed that the pan-genome of C. psittaci possessed a more stable gene repertoire than those of other extracellular bacteria, with ~90% of the genes per genome being conserved core genes. Furthermore, the evidence for significantly positive selection was identified in 20 virulence-associated gene products, particularly bacterial membrane-embedded proteins and type three secretion machines, which may play important roles in the pathogen-host interactions. This survey uncovered novel strains of C. psittaci causing pneumonia and the evolutionary analysis characterized prominent gene candidates involved in bacterial adaptation to immune pressures. The metagenomic approach is of significance to the surveillance of difficult-to-culture intracellular pathogens and the research into molecular epidemiology and evolutionary biology of C. psittaci.}, } @article {pmid37323667, year = {2023}, author = {Yang, MR and Su, SF and Wu, YW}, title = {Using bacterial pan-genome-based feature selection approach to improve the prediction of minimum inhibitory concentration (MIC).}, journal = {Frontiers in genetics}, volume = {14}, number = {}, pages = {1054032}, pmid = {37323667}, issn = {1664-8021}, abstract = {Background: Predicting the resistance profiles of antimicrobial resistance (AMR) pathogens is becoming more and more important in treating infectious diseases. Various attempts have been made to build machine learning models to classify resistant or susceptible pathogens based on either known antimicrobial resistance genes or the entire gene set. However, the phenotypic annotations are translated from minimum inhibitory concentration (MIC), which is the lowest concentration of antibiotic drugs in inhibiting certain pathogenic strains. Since the MIC breakpoints that classify a strain to be resistant or susceptible to specific antibiotic drug may be revised by governing institutes, we refrained from translating these MIC values into the categories "susceptible" or "resistant" but instead attempted to predict the MIC values using machine learning approaches. Results: By applying a machine learning feature selection approach on a Salmonella enterica pan-genome, in which the protein sequences were clustered to identify highly similar gene families, we showed that the selected features (genes) performed better than known AMR genes, and that models built on the selected genes achieved very accurate MIC prediction. Functional analysis revealed that about half of the selected genes were annotated as hypothetical proteins (i.e., with unknown functional roles), and that only a small portion of known AMR genes were among the selected genes, indicating that applying feature selection on the entire gene set has the potential of uncovering novel genes that may be associated with and may contribute to pathogenic antimicrobial resistances. Conclusion: The application of the pan-genome-based machine learning approach was indeed capable of predicting MIC values with very high accuracy. The feature selection process may also identify novel AMR genes for inferring bacterial antimicrobial resistance phenotypes.}, } @article {pmid37322453, year = {2023}, author = {Sun, Y and Kou, DR and Li, Y and Ni, JP and Wang, J and Zhang, YM and Wang, QN and Jiang, B and Wang, X and Sun, YX and Xu, XT and Tan, XJ and Zhang, YJ and Kong, XD}, title = {Pan-genome of Citrullus genus highlights the extent of presence/absence variation during domestication and selection.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {332}, pmid = {37322453}, issn = {1471-2164}, support = {31871964,31401753,32100352//National Natural Science Foundation of China/ ; 202003a06020009//Major Science and Technology Projects in Anhui Province/ ; 6111612//Foundation project of Jiangsu Academy of Agricultural Sciences/ ; }, mesh = {*Citrullus/genetics ; Domestication ; Plant Breeding ; Genome, Plant ; Sequence Analysis, DNA ; }, abstract = {The rich genetic diversity in Citrullus lanatus and the other six species in the Citrullus genus provides important sources in watermelon breeding. Here, we present the Citrullus genus pan-genome based on the 400 Citrullus genus resequencing data, showing that 477 Mb contigs and 6249 protein-coding genes were absent in the Citrullus lanatus reference genome. In the Citrullus genus pan-genome, there are a total of 8795 (30.5%) genes that exhibit presence/absence variations (PAVs). Presence/absence variation (PAV) analysis showed that a lot of gene PAV were selected during the domestication and improvement, such as 53 favorable genes and 40 unfavorable genes were identified during the C. mucosospermus to C. lanatus landrace domestication. We also identified 661 resistance gene analogs (RGAs) in the Citrullus genus pan-genome, which contains 90 RGAs (89 variable and 1 core gene) located on the pangenome additional contigs. By gene PAV-based GWAS, 8 gene presence/absence variations were found associated with flesh color. Finally, based on the results of gene PAV selection analysis between watermelon populations with different fruit colors, we identified four non-reference candidate genes associated with carotenoid accumulation, which had a significantly higher frequency in the white flesh. These results will provide an important source for watermelon breeding.}, } @article {pmid37318846, year = {2023}, author = {Zang, X and Lv, H and Huang, P and Sun, Z and Gu, C and Ding, W and Jiao, X and Huang, J}, title = {Genomic Insights into Pangenome and Antimicrobial Resistance in Campylobacter spp. Isolated from Chickens at Specific Growth Stages.}, journal = {Foodborne pathogens and disease}, volume = {20}, number = {7}, pages = {303-312}, doi = {10.1089/fpd.2023.0008}, pmid = {37318846}, issn = {1556-7125}, mesh = {Animals ; Chickens ; Anti-Bacterial Agents/pharmacology ; *Campylobacter jejuni ; *Campylobacter Infections/veterinary ; Phylogeny ; Drug Resistance, Bacterial/genetics ; *Campylobacter ; *Anti-Infective Agents ; Genomics ; }, abstract = {Improved understanding of the genetic basis of Campylobacter spp. colonization of poultry at specific growth stage is the key to developing a farm-based strategy to prevent flock colonization. In this study, 39 Campylobacter spp. strains (chicken isolates, n = 29; environmental isolates, n = 10) were collected from six marked chickens at the growth stage from week 7 to week 13. Then, we use comparative genomics techniques to analyze the temporal genomic characteristics of Campylobacter spp. in individual chickens across a production cycle. Genotype, average nucleotide identity (ANI), and phylogenetic trees all indicated the evolutionary relationships between the strains from different sampling weeks. The clustering of isolates was not dependent on sampling time and sample source, indicating that strains could persist over several weeks in a flock. Notably, 10 antimicrobial resistance (AMR) genes were identified in the genome of Campylobacter coli isolates, and the genomes of isolates sampled at week 11 harbored fewer AMR genes and insertion sequences (IS) than the isolates from other weeks. Consistent with this, pangenome-wide association analysis demonstrated that gene acquisition and loss could happen at week 11 and week 13. These genes were mainly associated with cell membrane biogenesis, ion metabolism, and DNA replication, suggesting that genomic change may be related to Campylobacter adaptive response. This is a novel study focused on the genetic changes occurring in Campylobacter spp. isolates in a particular space and time; it highlights that accessory genes and AMR genes were overall stable at chicken farm, which will help us understand the survival and the transmission route of Campylobacter spp. better, and have the potential to inform the strategy on the safety control of market-ready chickens.}, } @article {pmid37317256, year = {2023}, author = {Stone, NE and McDonough, RF and Hamond, C and LeCount, K and Busch, JD and Dirsmith, KL and Rivera-Garcia, S and Soltero, F and Arnold, LM and Weiner, Z and Galloway, RL and Schlater, LK and Nally, JE and Sahl, JW and Wagner, DM}, title = {DNA Capture and Enrichment: A Culture-Independent Approach for Characterizing the Genomic Diversity of Pathogenic Leptospira Species.}, journal = {Microorganisms}, volume = {11}, number = {5}, pages = {}, pmid = {37317256}, issn = {2076-2607}, abstract = {Because they are difficult to culture, obtaining genomic information from Leptospira spp. is challenging, hindering the overall understanding of leptospirosis. We designed and validated a culture-independent DNA capture and enrichment system for obtaining Leptospira genomic information from complex human and animal samples. It can be utilized with a variety of complex sample types and diverse species as it was designed using the pan-genome of all known pathogenic Leptospira spp. This system significantly increases the proportion of Leptospira DNA contained within DNA extracts obtained from complex samples, oftentimes reaching >95% even when some estimated starting proportions were <1%. Sequencing enriched extracts results in genomic coverage similar to sequenced isolates, thereby enabling enriched complex extracts to be analyzed together with whole genome sequences from isolates, which facilitates robust species identification and high-resolution genotyping. The system is flexible and can be readily updated when new genomic information becomes available. Implementation of this DNA capture and enrichment system will improve efforts to obtain genomic data from unculturable Leptospira-positive human and animal samples. This, in turn, will lead to a better understanding of the overall genomic diversity and gene content of Leptospira spp. that cause leptospirosis, aiding epidemiology and the development of improved diagnostics and vaccines.}, } @article {pmid37316739, year = {2023}, author = {Fudge, JB}, title = {Combining 47 human genomes into a single pangenome.}, journal = {Nature biotechnology}, volume = {41}, number = {6}, pages = {766}, doi = {10.1038/s41587-023-01842-4}, pmid = {37316739}, issn = {1546-1696}, } @article {pmid37316654, year = {2023}, author = {Gao, Y and Yang, X and Chen, H and Tan, X and Yang, Z and Deng, L and Wang, B and Kong, S and Li, S and Cui, Y and Lei, C and Wang, Y and Pan, Y and Ma, S and Sun, H and Zhao, X and Shi, Y and Yang, Z and Wu, D and Wu, S and Zhao, X and Shi, B and Jin, L and Hu, Z and , and Lu, Y and Chu, J and Ye, K and Xu, S}, title = {A pangenome reference of 36 Chinese populations.}, journal = {Nature}, volume = {619}, number = {7968}, pages = {112-121}, pmid = {37316654}, issn = {1476-4687}, mesh = {Humans ; *East Asian People/classification/genetics ; *Ethnicity/genetics ; *Genome, Human/genetics ; Sequence Analysis, DNA ; Ultraviolet Rays ; *Human Genetics/standards ; *Minority Groups ; Ethnic and Racial Minorities ; Reference Standards ; Haplotypes/genetics ; Euchromatin/genetics ; *Genetic Variation ; Alleles ; DNA Repair/genetics ; Keratins/genetics/metabolism ; Longevity/genetics ; Immunity/genetics ; }, abstract = {Human genomics is witnessing an ongoing paradigm shift from a single reference sequence to a pangenome form, but populations of Asian ancestry are underrepresented. Here we present data from the first phase of the Chinese Pangenome Consortium, including a collection of 116 high-quality and haplotype-phased de novo assemblies based on 58 core samples representing 36 minority Chinese ethnic groups. With an average 30.65× high-fidelity long-read sequence coverage, an average contiguity N50 of more than 35.63 megabases and an average total size of 3.01 gigabases, the CPC core assemblies add 189 million base pairs of euchromatic polymorphic sequences and 1,367 protein-coding gene duplications to GRCh38. We identified 15.9 million small variants and 78,072 structural variants, of which 5.9 million small variants and 34,223 structural variants were not reported in a recently released pangenome reference[1]. The Chinese Pangenome Consortium data demonstrate a remarkable increase in the discovery of novel and missing sequences when individuals are included from underrepresented minority ethnic groups. The missing reference sequences were enriched with archaic-derived alleles and genes that confer essential functions related to keratinization, response to ultraviolet radiation, DNA repair, immunological responses and lifespan, implying great potential for shedding new light on human evolution and recovering missing heritability in complex disease mapping.}, } @article {pmid37316594, year = {2023}, author = {}, title = {A pangenome reference representative of 36 minority Chinese ethnic groups.}, journal = {Nature}, volume = {}, number = {}, pages = {}, pmid = {37316594}, issn = {1476-4687}, } @article {pmid37313509, year = {2022}, author = {Reeves, PA and Richards, CM}, title = {A pan-genome data structure induced by pooled sequencing facilitates variant mining in heterogeneous germplasm.}, journal = {Molecular breeding : new strategies in plant improvement}, volume = {42}, number = {7}, pages = {36}, pmid = {37313509}, issn = {1572-9788}, abstract = {UNLABELLED: Valuable genetic variation lies unused in gene banks due to the difficulty of exploiting heterogeneous germplasm accessions. Advances in molecular breeding, including transgenics and genome editing, present the opportunity to exploit hidden sequence variation directly. Here we describe the pan-genome data structure induced by whole-genome sequencing of pooled individuals from wild populations of Patellifolia spp., a source of disease resistance genes for the related crop species sugar beet (Beta vulgaris). We represent the pan-genome as a map of reads from pooled sequencing of a heterogeneous population sample to a reference genome, plus a BLAST data base of the mapped reads. We show that this basic data structure can be queried by reference genome position or homology to identify sequence variants present in the wild relative, at genes of agronomic interest in the crop, a process known as allele or variant mining. Further we demonstrate the possibility of cataloging variants in all Patellifolia genomic regions that have corresponding single copy orthologous regions in sugar beet. The data structure, termed a "pooled read archive," can be produced, altered, and queried using standard tools to facilitate discovery of agronomically-important sequence variation.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s11032-022-01308-6.}, } @article {pmid37313015, year = {2022}, author = {Seyum, EG and Bille, NH and Abtew, WG and Munyengwa, N and Bell, JM and Cros, D}, title = {Genomic selection in tropical perennial crops and plantation trees: a review.}, journal = {Molecular breeding : new strategies in plant improvement}, volume = {42}, number = {10}, pages = {58}, pmid = {37313015}, issn = {1572-9788}, abstract = {UNLABELLED: To overcome the multiple challenges currently faced by agriculture, such as climate change and soil deterioration, more efficient plant breeding strategies are required. Genomic selection (GS) is crucial for the genetic improvement of quantitative traits, as it can increase selection intensity, shorten the generation interval, and improve selection accuracy for traits that are difficult to phenotype. Tropical perennial crops and plantation trees are of major economic importance and have consequently been the subject of many GS articles. In this review, we discuss the factors that affect GS accuracy (statistical models, linkage disequilibrium, information concerning markers, relatedness between training and target populations, the size of the training population, and trait heritability) and the genetic gain expected in these species. The impact of GS will be particularly strong in tropical perennial crops and plantation trees as they have long breeding cycles and constrained selection intensity. Future GS prospects are also discussed. High-throughput phenotyping will allow constructing of large training populations and implementing of phenomic selection. Optimized modeling is needed for longitudinal traits and multi-environment trials. The use of multi-omics, haploblocks, and structural variants will enable going beyond single-locus genotype data. Innovative statistical approaches, like artificial neural networks, are expected to efficiently handle the increasing amounts of heterogeneous multi-scale data. Targeted recombinations on sites identified from profiles of marker effects have the potential to further increase genetic gain. GS can also aid re-domestication and introgression breeding. Finally, GS consortia will play an important role in making the best of these opportunities.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s11032-022-01326-4.}, } @article {pmid37310928, year = {2023}, author = {Li, R and Gong, M and Zhang, X and Wang, F and Liu, Z and Zhang, L and Yang, Q and Xu, Y and Xu, M and Zhang, H and Zhang, Y and Dai, X and Gao, Y and Zhang, Z and Fang, W and Yang, Y and Fu, W and Cao, C and Yang, P and Ghanatsaman, ZA and Negari, NJ and Nanaei, HA and Yue, X and Song, Y and Lan, X and Deng, W and Wang, X and Pan, C and Xiang, R and Ibeagha-Awemu, EM and Heslop-Harrison, PJS and Rosen, BD and Lenstra, JA and Gan, S and Jiang, Y}, title = {A sheep pangenome reveals the spectrum of structural variations and their effects on tail phenotypes.}, journal = {Genome research}, volume = {33}, number = {3}, pages = {463-477}, pmid = {37310928}, issn = {1549-5469}, mesh = {Animals ; Sheep/genetics ; *Genome-Wide Association Study ; *Tail ; 5' Untranslated Regions ; Alleles ; Phenotype ; }, abstract = {Structural variations (SVs) are a major contributor to genetic diversity and phenotypic variations, but their prevalence and functions in domestic animals are largely unexplored. Here we generated high-quality genome assemblies for 15 individuals from genetically diverse sheep breeds using Pacific Biosciences (PacBio) high-fidelity sequencing, discovering 130.3 Mb nonreference sequences, from which 588 genes were annotated. A total of 149,158 biallelic insertions/deletions, 6531 divergent alleles, and 14,707 multiallelic variations with precise breakpoints were discovered. The SV spectrum is characterized by an excess of derived insertions compared to deletions (94,422 vs. 33,571), suggesting recent active LINE expansions in sheep. Nearly half of the SVs display low to moderate linkage disequilibrium with surrounding single-nucleotide polymorphisms (SNPs) and most SVs cannot be tagged by SNP probes from the widely used ovine 50K SNP chip. We identified 865 population-stratified SVs including 122 SVs possibly derived in the domestication process among 690 individuals from sheep breeds worldwide. A novel 168-bp insertion in the 5' untranslated region (5' UTR) of HOXB13 is found at high frequency in long-tailed sheep. Further genome-wide association study and gene expression analyses suggest that this mutation is causative for the long-tail trait. In summary, we have developed a panel of high-quality de novo assemblies and present a catalog of structural variations in sheep. Our data capture abundant candidate functional variations that were previously unexplored and provide a fundamental resource for understanding trait biology in sheep.}, } @article {pmid37303795, year = {2023}, author = {Zhang, Z and Cui, M and Chen, P and Li, J and Mao, Z and Mao, Y and Li, Z and Guo, Q and Wang, C and Liao, X and Liu, H}, title = {Insight into the phylogeny and metabolic divergence of Monascus species (M. pilosus, M. ruber, and M. purpureus) at the genome level.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1199144}, pmid = {37303795}, issn = {1664-302X}, abstract = {BACKGROUND: Species of the genus Monascus are economically important and widely used in the production of food colorants and monacolin K. However, they have also been known to produce the mycotoxin citrinin. Currently, taxonomic knowledge of this species at the genome level is insufficient.

METHODS: This study presents genomic similarity analyses through the analysis of the average nucleic acid identity of the genomic sequence and the whole genome alignment. Subsequently, the study constructed a pangenome of Monascus by reannotating all the genomes and identifying a total of 9,539 orthologous gene families. Two phylogenetic trees were constructed based on 4,589 single copy orthologous protein sequences and all the 5,565 orthologous proteins, respectively. In addition, carbohydrate active enzymes, secretome, allergic proteins, as well as secondary metabolite gene clusters were compared among the included 15 Monascus strains.

RESULTS: The results clearly revealed a high homology between M. pilosus and M. ruber, and their distant relationship with M. purpureus. Accordingly, all the included 15 Monascus strains should be classified into two distinctly evolutionary clades, namely the M. purpureus clade and the M. pilosus-M. ruber clade. Moreover, gene ontology enrichment showed that the M. pilosus-M. ruber clade had more orthologous genes involved with environmental adaptation than the M. purpureus clade. Compared to Aspergillus oryzae, all the Monascus species had a substantial gene loss of carbohydrate active enzymes. Potential allergenic and fungal virulence factor proteins were also found in the secretome of Monascus. Furthermore, this study identified the pigment synthesis gene clusters present in all included genomes, but with multiple nonessential genes inserted in the gene cluster of M. pilosus and M. ruber compared to M. purpureus. The citrinin gene cluster was found to be intact and highly conserved only among M. purpureus genomes. The monacolin K gene cluster was found only in the genomes of M. pilosus and M. ruber, but the sequence was more conserved in M. ruber.

CONCLUSION: This study provides a paradigm for phylogenetic analysis of the genus Monascus, and it is believed that this report will lead to a better understanding of these food microorganisms in terms of classification, metabolic differentiation, and safety.}, } @article {pmid37298462, year = {2023}, author = {Wekesa, C and Kiprotich, K and Okoth, P and Asudi, GO and Muoma, JO and Furch, ACU and Oelmüller, R}, title = {Molecular Characterization of Indigenous Rhizobia from Kenyan Soils Nodulating with Common Beans.}, journal = {International journal of molecular sciences}, volume = {24}, number = {11}, pages = {}, pmid = {37298462}, issn = {1422-0067}, mesh = {*Rhizobium/genetics ; Kenya ; *Phaseolus/microbiology ; Soil ; Symbiosis/genetics ; Nitrogen ; }, abstract = {Kenya is the seventh most prominent producer of common beans globally and the second leading producer in East Africa. However, the annual national productivity is low due to insufficient quantities of vital nutrients and nitrogen in the soils. Rhizobia are symbiotic bacteria that fix nitrogen through their interaction with leguminous plants. Nevertheless, inoculating beans with commercial rhizobia inoculants results in sparse nodulation and low nitrogen supply to the host plants because these strains are poorly adapted to the local soils. Several studies describe native rhizobia with much better symbiotic capabilities than commercial strains, but only a few have conducted field studies. This study aimed to test the competence of new rhizobia strains that we isolated from Western Kenya soils and for which the symbiotic efficiency was successfully determined in greenhouse experiments. Furthermore, we present and analyze the whole-genome sequence for a promising candidate for agricultural application, which has high nitrogen fixation features and promotes common bean yields in field studies. Plants inoculated with the rhizobial isolate S3 or with a consortium of local isolates (COMB), including S3, produced a significantly higher number of seeds and seed dry weight when compared to uninoculated control plants at two study sites. The performance of plants inoculated with commercial isolate CIAT899 was not significantly different from uninoculated plants (p > 0.05), indicating tight competition from native rhizobia for nodule occupancy. Pangenome analysis and the overall genome-related indices showed that S3 is a member of R. phaseoli. However, synteny analysis revealed significant differences in the gene order, orientation, and copy numbers between S3 and the reference R. phaseoli. Isolate S3 is phylogenomically similar to R. phaseoli. However, it has undergone significant genome rearrangements (global mutagenesis) to adapt to harsh conditions in Kenyan soils. Its high nitrogen fixation ability shows optimal adaptation to Kenyan soils, and the strain can potentially replace nitrogenous fertilizer application. We recommend that extensive fieldwork in other parts of the country over a period of five years be performed on S3 to check on how the yield changes with varying whether conditions.}, } @article {pmid37296461, year = {2023}, author = {Schmidt, S and Khan, S and Alanko, JN and Pibiri, GE and Tomescu, AI}, title = {Matchtigs: minimum plain text representation of k-mer sets.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {136}, pmid = {37296461}, issn = {1474-760X}, support = {851093//H2020 European Research Council/ ; 322595//Academy of Finland/ ; 328877//Academy of Finland/ ; 101006879//Horizon 2020 Framework Programme/ ; }, mesh = {*Software ; Sequence Analysis, DNA ; *Algorithms ; Bacteria ; }, abstract = {We propose a polynomial algorithm computing a minimum plain-text representation of k-mer sets, as well as an efficient near-minimum greedy heuristic. When compressing read sets of large model organisms or bacterial pangenomes, with only a minor runtime increase, we shrink the representation by up to 59% over unitigs and 26% over previous work. Additionally, the number of strings is decreased by up to 97% over unitigs and 90% over previous work. Finally, a small representation has advantages in downstream applications, as it speeds up SSHash-Lite queries by up to 4.26× over unitigs and 2.10× over previous work.}, } @article {pmid37291196, year = {2023}, author = {He, Q and Tang, S and Zhi, H and Chen, J and Zhang, J and Liang, H and Alam, O and Li, H and Zhang, H and Xing, L and Li, X and Zhang, W and Wang, H and Shi, J and Du, H and Wu, H and Wang, L and Yang, P and Xing, L and Yan, H and Song, Z and Liu, J and Wang, H and Tian, X and Qiao, Z and Feng, G and Guo, R and Zhu, W and Ren, Y and Hao, H and Li, M and Zhang, A and Guo, E and Yan, F and Li, Q and Liu, Y and Tian, B and Zhao, X and Jia, R and Feng, B and Zhang, J and Wei, J and Lai, J and Jia, G and Purugganan, M and Diao, X}, title = {A graph-based genome and pan-genome variation of the model plant Setaria.}, journal = {Nature genetics}, volume = {55}, number = {7}, pages = {1232-1242}, pmid = {37291196}, issn = {1546-1718}, mesh = {Chromosome Mapping ; *Setaria Plant/genetics/metabolism ; Plant Breeding ; Phenotype ; Quantitative Trait Loci ; Genome, Plant/genetics ; Phylogeny ; Plant Proteins/genetics ; }, abstract = {Setaria italica (foxtail millet), a founder crop of East Asian agriculture, is a model plant for C4 photosynthesis and developing approaches to adaptive breeding across multiple climates. Here we established the Setaria pan-genome by assembling 110 representative genomes from a worldwide collection. The pan-genome is composed of 73,528 gene families, of which 23.8%, 42.9%, 29.4% and 3.9% are core, soft core, dispensable and private genes, respectively; 202,884 nonredundant structural variants were also detected. The characterization of pan-genomic variants suggests their importance during foxtail millet domestication and improvement, as exemplified by the identification of the yield gene SiGW3, where a 366-bp presence/absence promoter variant accompanies gene expression variation. We developed a graph-based genome and performed large-scale genetic studies for 68 traits across 13 environments, identifying potential genes for millet improvement at different geographic sites. These can be used in marker-assisted breeding, genomic selection and genome editing to accelerate crop improvement under different climatic conditions.}, } @article {pmid37291142, year = {2023}, author = {Eché, C and Iampietro, C and Birbes, C and Dréau, A and Kuchly, C and Di Franco, A and Klopp, C and Faraut, T and Djebali, S and Castinel, A and Zytnicki, M and Denis, E and Boussaha, M and Grohs, C and Boichard, D and Gaspin, C and Milan, D and Donnadieu, C}, title = {A Bos taurus sequencing methods benchmark for assembly, haplotyping, and variant calling.}, journal = {Scientific data}, volume = {10}, number = {1}, pages = {369}, pmid = {37291142}, issn = {2052-4463}, mesh = {Animals ; Cattle ; Female ; Benchmarking ; Genome ; *Genomics ; *High-Throughput Nucleotide Sequencing ; Sequence Analysis, DNA ; }, abstract = {Inspired by the production of reference data sets in the Genome in a Bottle project, we sequenced one Charolais heifer with different technologies: Illumina paired-end, Oxford Nanopore, Pacific Biosciences (HiFi and CLR), 10X Genomics linked-reads, and Hi-C. In order to generate haplotypic assemblies, we also sequenced both parents with short reads. From these data, we built two haplotyped trio high quality reference genomes and a consensus assembly, using up-to-date software packages. The assemblies obtained using PacBio HiFi reaches a size of 3.2 Gb, which is significantly larger than the 2.7 Gb ARS-UCD1.2 reference. The BUSCO score of the consensus assembly reaches a completeness of 95.8%, among highly conserved mammal genes. We also identified 35,866 structural variants larger than 50 base pairs. This assembly is a contribution to the bovine pangenome for the "Charolais" breed. These datasets will prove to be useful resources enabling the community to gain additional insight on sequencing technologies for applications such as SNP, indel or structural variant calling, and de novo assembly.}, } @article {pmid37289488, year = {2023}, author = {Mossop, M and Robinson, L and Jiang, JH and Peleg, AY and Blakeway, LV and Macesic, N and Perry, A and Bourke, S and Ulhuq, FR and Palmer, T}, title = {Characterisation of key genotypic and phenotypic traits of clinical cystic fibrosis Staphylococcus aureus isolates.}, journal = {Journal of medical microbiology}, volume = {72}, number = {6}, pages = {}, doi = {10.1099/jmm.0.001703}, pmid = {37289488}, issn = {1473-5644}, mesh = {Animals ; Sheep ; Staphylococcus aureus ; *Cystic Fibrosis/complications/microbiology ; *Coinfection/microbiology ; Agar ; Phenotype ; *Staphylococcal Infections/microbiology ; Anti-Bacterial Agents/pharmacology ; }, abstract = {Introduction. One third of people with CF in the UK are co-infected by both Staphylococcus aureus and Pseudomonas aeruginosa. Chronic bacterial infection in CF contributes to the gradual destruction of lung tissue, and eventually respiratory failure in this group.Gap Statement. The contribution of S. aureus to cystic fibrosis (CF) lung decline in the presence or absence of P. aeruginosa is unclear. Defining the molecular and phenotypic characteristics of a range of S. aureus clinical isolates will help further understand its pathogenic capabilities.Aim. Our objective was to use molecular and phenotypic tools to characterise twenty-five clinical S. aureus isolates collected from mono- and coinfection with P. aeruginosa from people with CF at the Royal Victoria Infirmary, Newcastle upon Tyne.Methodology. Genomic DNA was extracted and sequenced. Multilocus sequence typing was used to construct phylogeny from the seven housekeeping genes. A pangenome was calculated using Roary, and cluster of Orthologous groups were assigned using eggNOG-mapper which were used to determine differences within core, accessory, and unique genomes. Characterisation of sequence type, clonal complex, agr and spa types was carried out using PubMLST, eBURST, AgrVATE and spaTyper, respectively. Antibiotic resistance was determined using Kirby-Bauer disc diffusion tests. Phenotypic testing of haemolysis was carried out using ovine red blood cell agar plates and mucoid phenotypes visualised using Congo red agar.Results. Clinical strains clustered closely based on agr type, sequence type and clonal complex. COG analysis revealed statistically significant enrichment of COG families between core, accessory and unique pangenome groups. The unique genome was significantly enriched for replication, recombination and repair, and defence mechanisms. The presence of known virulence genes and toxins were high within this group, and unique genes were identified in 11 strains. Strains which were isolated from the same patient all surpassed average nucleotide identity thresholds, however, differed in phenotypic traits. Antimicrobial resistance to macrolides was significantly higher in the coinfection group.Conclusion. There is huge variation in genetic and phenotypic capabilities of S. aureus strains. Further studies on how these may differ in relation to other species in the CF lung may give insight into inter-species interactions.}, } @article {pmid37285390, year = {2023}, author = {Rubin, JD and Vogel, NA and Gopalakrishnan, S and Sackett, PW and Renaud, G}, title = {HaploCart: Human mtDNA haplogroup classification using a pangenomic reference graph.}, journal = {PLoS computational biology}, volume = {19}, number = {6}, pages = {e1011148}, pmid = {37285390}, issn = {1553-7358}, mesh = {Humans ; *DNA, Mitochondrial/genetics ; Bayes Theorem ; Haplotypes/genetics ; *Mitochondria/genetics ; Mutation ; }, abstract = {Current mitochondrial DNA (mtDNA) haplogroup classification tools map reads to a single reference genome and perform inference based on the detected mutations to this reference. This approach biases haplogroup assignments towards the reference and prohibits accurate calculations of the uncertainty in assignment. We present HaploCart, a probabilistic mtDNA haplogroup classifier which uses a pangenomic reference graph framework together with principles of Bayesian inference. We demonstrate that our approach significantly outperforms available tools by being more robust to lower coverage or incomplete consensus sequences and producing phylogenetically-aware confidence scores that are unbiased towards any haplogroup. HaploCart is available both as a command-line tool and through a user-friendly web interface. The C++ program accepts as input consensus FASTA, FASTQ, or GAM files, and outputs a text file with the haplogroup assignments of the samples along with the level of confidence in the assignments. Our work considerably reduces the amount of data required to obtain a confident mitochondrial haplogroup assignment.}, } @article {pmid37285209, year = {2023}, author = {Liu, R and Ma, L and Wang, H and Liu, D and Lu, X and Huang, X and Huang, S and Liu, X}, title = {Comparative genomics reveals intraspecific divergence of Acidithiobacillus ferrooxidans: insights from evolutionary adaptation.}, journal = {Microbial genomics}, volume = {9}, number = {6}, pages = {}, pmid = {37285209}, issn = {2057-5858}, mesh = {*Acidithiobacillus/genetics/metabolism ; Genomics/methods ; Iron/metabolism ; Adaptation, Physiological/genetics ; }, abstract = {Acidithiobacillus ferrooxidans serves as a model chemolithoautotrophic organism in extremely acidic environments, which has attracted much attention due to its unique metabolism and strong adaptability. However, little was known about the divergences along the evolutionary process based on whole genomes. Herein, we isolated six strains of A. ferrooxidans from mining areas in China and Zambia, and used comparative genomics to investigate the intra-species divergences. The results indicated that A. ferrooxidans diverged into three groups from a common ancestor, and the pan-genome is 'open'. The ancestral reconstruction of A. ferrooxidans indicated that genome sizes experienced a trend of increase in the very earliest days before a decreasing tendency during the evolutionary process, suggesting that both gene gain and gene loss played crucial roles in A. ferrooxidans genome flexibility. Meanwhile, 23 single-copy orthologous groups (OGs) were under positive selection. The differences of rusticyanin (Rus) sequences (the key protein in the iron oxidation pathway) and type IV secretion system (T4SS) composition in the A. ferrooxidans were both related to their group divergences, which contributed to their intraspecific diversity. This study improved our understanding of the divergent evolution and environmental adaptation of A. ferrooxidans at the genome level in extreme conditions, which provided theoretical support for the survival mechanism of living creatures at the extreme.}, } @article {pmid37278719, year = {2023}, author = {Noll, N and Molari, M and Shaw, LP and Neher, RA}, title = {PanGraph: scalable bacterial pan-genome graph construction.}, journal = {Microbial genomics}, volume = {9}, number = {6}, pages = {}, pmid = {37278719}, issn = {2057-5858}, support = {/WT_/Wellcome Trust/United Kingdom ; 220422/Z/20/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {*Genomics ; *Genome, Bacterial ; }, abstract = {The genomic diversity of microbes is commonly parameterized as SNPs relative to a reference genome of a well-characterized, but arbitrary, isolate. However, any reference genome contains only a fraction of the microbial pangenome, the total set of genes observed in a given species. Reference-based approaches are thus blind to the dynamics of the accessory genome, as well as variation within gene order and copy number. With the widespread usage of long-read sequencing, the number of high-quality, complete genome assemblies has increased dramatically. In addition to pangenomic approaches that focus on the variation in the sets of genes present in different genomes, complete assemblies allow investigations of the evolution of genome structure and gene order. This latter problem, however, is computationally demanding with few tools available that shed light on these dynamics. Here, we present PanGraph, a Julia-based library and command line interface for aligning whole genomes into a graph. Each genome is represented as a path along vertices, which in turn encapsulate homologous multiple sequence alignments. The resultant data structure succinctly summarizes population-level nucleotide and structural polymorphisms and can be exported into several common formats for either downstream analysis or immediate visualization.}, } @article {pmid37275147, year = {2023}, author = {Salvà-Serra, F and Pérez-Pantoja, D and Donoso, RA and Jaén-Luchoro, D and Fernández-Juárez, V and Engström-Jakobsson, H and Moore, ERB and Lalucat, J and Bennasar-Figueras, A}, title = {Comparative genomics of Stutzerimonas balearica (Pseudomonas balearica): diversity, habitats, and biodegradation of aromatic compounds.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1159176}, pmid = {37275147}, issn = {1664-302X}, abstract = {Stutzerimonas balearica (Pseudomonas balearica) has been found principally in oil-polluted environments. The capability of S. balearica to thrive from the degradation of pollutant compounds makes it a species of interest for potential bioremediation applications. However, little has been reported about the diversity of S. balearica. In this study, genome sequences of S. balearica strains from different origins were analyzed, revealing that it is a diverse species with an open pan-genome that will continue revealing new genes and functionalities as the genomes of more strains are sequenced. The nucleotide signatures and intra- and inter-species variation of the 16S rRNA genes of S. balearica were reevaluated. A strategy of screening 16S rRNA gene sequences in public databases enabled the detection of 158 additional strains, of which only 23% were described as S. balearica. The species was detected from a wide range of environments, although mostly from aquatic and polluted environments, predominantly related to petroleum oil. Genomic and phenotypic analyses confirmed that S. balearica possesses varied inherent capabilities for aromatic compounds degradation. This study increases the knowledge of the biology and diversity of S. balearica and will serve as a basis for future work with the species.}, } @article {pmid37274318, year = {2023}, author = {Lopez, MES and Gontijo, MTP and Cardoso, RR and Batalha, LS and Eller, MR and Bazzolli, DMS and Vidigal, PMP and Mendonça, RCS}, title = {Complete genome analysis of Tequatrovirus ufvareg1, a Tequatrovirus species inhibiting Escherichia coli O157:H7.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1178248}, pmid = {37274318}, issn = {2235-2988}, mesh = {Humans ; *Escherichia coli O157/genetics ; *Bacteriophages/genetics ; Genome ; Genomics ; Base Sequence ; }, abstract = {INTRODUCTION: Bacteriophages infecting human pathogens have been considered potential biocontrol agents, and studying their genetic content is essential to their safe use in the food industry. Tequatrovirus ufvareg1 is a bacteriophage named UFV-AREG1, isolated from cowshed wastewater and previously tested for its ability to inhibit Escherichia coli O157:H7.

METHODS: T. ufvareg1 was previously isolated using E. coli O157:H7 (ATCC 43895) as a bacterial host. The same strain was used for bacteriophage propagation and the one-step growth curve. The genome of the T. ufvareg1 was sequenced using 305 Illumina HiSeq, and the genome comparison was calculated by VIRIDIC and VIPTree.

RESULTS: Here, we characterize its genome and compare it to other Tequatrovirus. T. ufvareg1 virions have an icosahedral head (114 x 86 nm) and a contracted tail (117 x 23 nm), with a latent period of 25 min, and an average burst size was 18 phage particles per infected E. coli cell. The genome of the bacteriophage T. ufvareg1 contains 268 coding DNA sequences (CDS) and ten tRNA genes distributed in both negative and positive strains. T. ufvareg1 genome also contains 40 promoters on its regulatory regions and two rho-independent terminators. T. ufvareg1 shares an average intergenomic similarity (VIRIDC) of 88.77% and an average genomic similarity score (VipTree) of 88.91% with eight four reference genomes for Tequatrovirus available in the NCBI RefSeq database. The pan-genomic analysis confirmed the high conservation of Tequatrovirus genomes. Among all CDS annotated in the T. ufvareg1 genome, there are 123 core genes, 38 softcore genes, 94 shell genes, and 13 cloud genes. None of 268 CDS was classified as being exclusive of T. ufvareg1.

CONCLUSION: The results in this paper, combined with other previously published findings, indicate that T. ufvareg1 bacteriophage is a potential candidate for food protection against E. coli O157:H7 in foods.}, } @article {pmid37267130, year = {2023}, author = {van den Brandt, AVD and Jonkheer, EM and van Workum, DM and van de Wetering, H and Smit, S and Vilanova, A}, title = {PanVA: Pangenomic Variant Analysis.}, journal = {IEEE transactions on visualization and computer graphics}, volume = {PP}, number = {}, pages = {}, doi = {10.1109/TVCG.2023.3282364}, pmid = {37267130}, issn = {1941-0506}, abstract = {Genomics researchers increasingly use multiple reference genomes to comprehensively explore genetic variants underlying differences in detectable characteristics between organisms. Pangenomes allow for an efficient data representation of multiple related genomes and their associated metadata. However, current visual analysis approaches for exploring these complex genotype-phenotype relationships are often based on single reference approaches or lack adequate support for interpreting the variants in the genomic context with heterogeneous (meta)data. This design study introduces PanVA, a visual analytics design for pangenomic variant analysis developed with the active participation of genomics researchers. The design uniquely combines tailored visual representations with interactions such as sorting, grouping, and aggregation, allowing users to navigate and explore different perspectives on complex genotype-phenotype relations. Through evaluation in the context of plants and pathogen research, we show that PanVA helps researchers explore variants in genes and generate hypotheses about their role in phenotypic variation.}, } @article {pmid37261234, year = {2023}, author = {Fatima, S and Ishaq, Z and Irfan, M and AlAsmari, AF and Achakzai, JK and Zaheer, T and Ali, A and Akbar, A}, title = {Whole-genome sequencing of multidrug resistance Salmonella Typhi clinical strains isolated from Balochistan, Pakistan.}, journal = {Frontiers in public health}, volume = {11}, number = {}, pages = {1151805}, pmid = {37261234}, issn = {2296-2565}, mesh = {Humans ; *Salmonella typhi/genetics ; Multilocus Sequence Typing ; Pakistan ; *Anti-Bacterial Agents/pharmacology ; Virulence Factors/genetics ; Whole Genome Sequencing ; Drug Resistance, Multiple ; }, abstract = {INTRODUCTION: Salmonella enterica serovar Typhi (S. Typhi) is a major cause of morbidity and mortality in developing countries, contributing significantly to the global disease burden.

METHODS: In this study, S. Typhi strains were isolated from 100 patients exhibiting symptoms of typhoid fever at a tertiary care hospital in Pakistan. Antimicrobial testing of all isolates was performed to determine the sensitivity and resistance pattern. Three MDR strains, namely QS194, QS430, and QS468, were subjected to whole genome sequencing for genomic characterization.

RESULTS AND DISCUSSION: MLST analysis showed that QS194, belonged to ST19, which is commonly associated with Salmonella enterica serovar typhimurium. In contrast, QS430 and QS468, belonged to ST1, a sequence type frequently associated with S. Typhi. PlasmidFinder identified the presence of IncFIB(S) and IncFII(S) plasmids in QS194, while IncQ1 was found in QS468. No plasmid was detected in QS430. CARD-based analysis showed that the strains were largely resistant to a variety of antibiotics and disinfecting agents/antiseptics, including fluoroquinolones, cephalosporins, monobactams, cephamycins, penams, phenicols, tetracyclines, rifamycins, aminoglycosides, etc. The S. Typhi strains possessed various virulence factors, such as Vi antigen, Agf/Csg, Bcf, Fim, Pef, etc. The sequencing data indicated that the strains had antibiotic resistance determinants and shared common virulence factors. Pangenome analysis of the selected S. Typhi strains identified 13,237 genes, with 3,611 being core genes, 2,093 shell genes, and 7,533 cloud genes. Genome-based typing and horizontal gene transfer analysis revealed that the strains had different evolutionary origins and may have adapted to distinct environments or host organisms. These findings provide important insights into the genetic characteristics of S. Typhi strains and their potential association with various ecological niches and host organisms.}, } @article {pmid37258301, year = {2023}, author = {Ahmed, O and Rossi, M and Boucher, C and Langmead, B}, title = {Efficient taxa identification using a pangenome index.}, journal = {Genome research}, volume = {33}, number = {7}, pages = {1069-1077}, doi = {10.1101/gr.277642.123}, pmid = {37258301}, issn = {1549-5469}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; R35 GM139602/GM/NIGMS NIH HHS/United States ; T32 GM119998/GM/NIGMS NIH HHS/United States ; }, mesh = {*Algorithms ; Sequence Analysis ; *Bacteria/genetics ; }, abstract = {Tools that classify sequencing reads against a database of reference sequences require efficient index data-structures. The r-index is a compressed full-text index that answers substring presence/absence, count, and locate queries in space proportional to the amount of distinct sequence in the database: [Formula: see text] space, where r is the number of Burrows-Wheeler runs. To date, the r-index has lacked the ability to quickly classify matches according to which reference sequences (or sequence groupings, i.e., taxa) a match overlaps. We present new algorithms and methods for solving this problem. Specifically, given a collection D of d documents, [Formula: see text] over an alphabet of size σ, we extend the r-index with [Formula: see text] additional words to support document listing queries for a pattern [Formula: see text] that occurs in [Formula: see text] documents in D in [Formula: see text] time and [Formula: see text] space, where w is the machine word size. Applied in a bacterial mock community experiment, our method is up to three times faster than a comparable method that uses the standard r-index locate queries. We show that our method classifies both simulated and real nanopore reads at the strain level with higher accuracy compared with other approaches. Finally, we present strategies for compacting this structure in applications in which read lengths or match lengths can be bounded.}, } @article {pmid37256057, year = {2023}, author = {Zhao, W and Zeng, W and Pang, B and Luo, M and Peng, Y and Xu, J and Kan, B and Li, Z and Lu, X}, title = {Oxford nanopore long-read sequencing enables the generation of complete bacterial and plasmid genomes without short-read sequencing.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1179966}, pmid = {37256057}, issn = {1664-302X}, abstract = {INTRODUCTION: Genome-based analysis is crucial in monitoring antibiotic-resistant bacteria (ARB)and antibiotic-resistance genes (ARGs). Short-read sequencing is typically used to obtain incomplete draft genomes, while long-read sequencing can obtain genomes of multidrug resistance (MDR) plasmids and track the transmission of plasmid-borne antimicrobial resistance genes in bacteria. However, long-read sequencing suffers from low-accuracy base calling, and short-read sequencing is often required to improve genome accuracy. This increases costs and turnaround time.

METHODS: In this study, a novel ONT sequencing method is described, which uses the latest ONT chemistry with improved accuracy to assemble genomes of MDR strains and plasmids from long-read sequencing data only. Three strains of Salmonella carrying MDR plasmids were sequenced using the ONT SQK-LSK114 kit with flow cell R10.4.1, and de novo genome assembly was performed with average read accuracy (Q > 10) of 98.9%.

RESULTS AND DISCUSSION: For a 5-Mb-long bacterial genome, finished genome sequences with accuracy of >99.99% could be obtained at 75× sequencing coverage depth using Flye and Medaka software. Thus, this new ONT method greatly improves base-calling accuracy, allowing for the de novo assembly of high-quality finished bacterial or plasmid genomes without the need for short-read sequencing. This saves both money and time and supports the application of ONT data in critical genome-based epidemiological analyses. The novel ONT approach described in this study can take the place of traditional combination genome assembly based on short- and long-read sequencing, enabling pangenomic analyses based on high-quality complete bacterial and plasmid genomes to monitor the spread of antibiotic-resistant bacteria and antibiotic resistance genes.}, } @article {pmid37250090, year = {2023}, author = {Zhang, JX and Xu, JH and Yuan, B and Wang, XD and Mao, XH and Wang, JL and Zhang, XL and Yuan, Y}, title = {Detection of Burkholderia pseudomallei with CRISPR-Cas12a based on specific sequence tags.}, journal = {Frontiers in public health}, volume = {11}, number = {}, pages = {1153352}, pmid = {37250090}, issn = {2296-2565}, mesh = {Humans ; *Burkholderia pseudomallei/genetics ; *Melioidosis/diagnosis/genetics/microbiology ; CRISPR-Cas Systems ; }, abstract = {Melioidosis is a bacterial infection caused by Burkholderia pseudomallei (B. pseudomallei), posing a significant threat to public health. Rapid and accurate detection of B. pseudomallei is crucial for preventing and controlling melioidosis. However, identifying B. pseudomallei is challenging due to its high similarity to other species in the same genus. To address this issue, this study proposed a dual-target method that can specifically identify B. pseudomallei in less than 40 min. We analyzed 1722 B. pseudomallei genomes to construct large-scale pan-genomes and selected specific sequence tags in their core genomes that effectively distinguish B. pseudomallei from its closely related species. Specifically, we selected two specific tags, LC1 and LC2, which we combined with the Clustered Regularly Interspaced Short Palindromic Repeats (CRISPR)-CRISPR associated proteins (Cas12a) system and recombinase polymerase amplification (RPA) pre-amplification. Our analysis showed that the dual-target RPA-CRISPR/Cas12a assay has a sensitivity of approximately 0.2 copies/reaction and 10 fg genomic DNA for LC1, and 2 copies/reaction and 20 fg genomic DNA for LC2. Additionally, our method can accurately and rapidly detect B. pseudomallei in human blood and moist soil samples using the specific sequence tags mentioned above. In conclusion, the dual-target RPA-CRISPR/Cas12a method is a valuable tool for the rapid and accurate identification of B. pseudomallei in clinical and environmental samples, aiding in the prevention and control of melioidosis.}, } @article {pmid37249320, year = {2023}, author = {}, title = {New Genomic Sequencing Resource Could Improve Care.}, journal = {Cancer discovery}, volume = {13}, number = {7}, pages = {1506-1507}, doi = {10.1158/2159-8290.CD-NB2023-0042}, pmid = {37249320}, issn = {2159-8290}, mesh = {Humans ; Base Sequence ; *Genomics ; }, abstract = {The first draft of a human pangenomic reference, which includes 47 individuals selected to maximize biogeographic diversity, offers a path to more accurate and effective screening for disease. This broader and more complete view of genetic diversity could lead to new targets for cancer therapies.}, } @article {pmid37249052, year = {2023}, author = {Chen, Y and Guo, Y and Xie, X and Wang, Z and Miao, L and Yang, Z and Jiao, Y and Xie, C and Liu, J and Hu, Z and Xin, M and Yao, Y and Ni, Z and Sun, Q and Peng, H and Guo, W}, title = {Pangenome-based trajectories of intracellular gene transfers in Poaceae unveil high cumulation in Triticeae.}, journal = {Plant physiology}, volume = {193}, number = {1}, pages = {578-594}, pmid = {37249052}, issn = {1532-2548}, mesh = {*Poaceae/genetics ; Triticum/genetics ; Genome, Plant/genetics ; *Oryza/genetics ; Zea mays/genetics ; Evolution, Molecular ; }, abstract = {Intracellular gene transfers (IGTs) between the nucleus and organelles, including plastids and mitochondria, constantly reshape the nuclear genome during evolution. Despite the substantial contribution of IGTs to genome variation, the dynamic trajectories of IGTs at the pangenomic level remain elusive. Here, we developed an approach, IGTminer, that maps the evolutionary trajectories of IGTs using collinearity and gene reannotation across multiple genome assemblies. We applied IGTminer to create a nuclear organellar gene (NOG) map across 67 genomes covering 15 Poaceae species, including important crops. The resulting NOGs were verified by experiments and sequencing data sets. Our analysis revealed that most NOGs were recently transferred and lineage specific and that Triticeae species tended to have more NOGs than other Poaceae species. Wheat (Triticum aestivum) had a higher retention rate of NOGs than maize (Zea mays) and rice (Oryza sativa), and the retained NOGs were likely involved in photosynthesis and translation pathways. Large numbers of NOG clusters were aggregated in hexaploid wheat during 2 rounds of polyploidization, contributing to the genetic diversity among modern wheat accessions. We implemented an interactive web server to facilitate the exploration of NOGs in Poaceae. In summary, this study provides resources and insights into the roles of IGTs in shaping interspecies and intraspecies genome variation and driving plant genome evolution.}, } @article {pmid37246787, year = {2023}, author = {Qian, C and Xu, M and Huang, Z and Tan, M and Fu, C and Zhou, T and Cao, J and Zhou, C}, title = {Complete genome sequence of the emerging pathogen Cysteiniphilum spp. and comparative genomic analysis with genus Francisella: Insights into its genetic diversity and potential virulence traits.}, journal = {Virulence}, volume = {14}, number = {1}, pages = {2214416}, pmid = {37246787}, issn = {2150-5608}, mesh = {Humans ; Virulence/genetics ; *Genome, Bacterial ; *Francisella tularensis/genetics ; Phylogeny ; Genomics ; DNA Transposable Elements ; Genetic Variation ; }, abstract = {Cysteiniphilum is a newly discovered genus in 2017 and is phylogenetically closely related to highly pathogenic Francisella tularensis. Recently, it has become an emerging pathogen in humans. However, the complete genome sequence of genus Cysteiniphilum is lacking, and the genomic characteristics of genetic diversity, evolutionary dynamics, and pathogenicity have not been characterized. In this study, the complete genome of the first reported clinical isolate QT6929 of genus Cysteiniphilum was sequenced, and comparative genomics analyses to Francisella genus were conducted to unveil the genomic landscape and diversity of the genus Cysteiniphilum. Our results showed that the complete genome of QT6929 consists of one 2.61 Mb chromosome and a 76,819 bp plasmid. The calculated average nucleotide identity and DNA-DNA hybridization values revealed that two clinical isolates QT6929 and JM-1 should be reclassified as two novel species in genus Cysteiniphilum. Pan-genome analysis revealed genomic diversity within the genus Cysteiniphilum and an open pan-genome state. Genomic plasticity analysis exhibited abundant mobile genetic elements including genome islands, insertion sequences, prophages, and plasmids on Cysteiniphilum genomes, which facilitated the broad exchange of genetic material between Cysteiniphilum and other genera like Francisella and Legionella. Several potential virulence genes associated with lipopolysaccharide/lipooligosaccharide, capsule, and haem biosynthesis specific to clinical isolates were predicted and might contribute to their pathogenicity in humans. Incomplete Francisella pathogenicity island was identified in most Cysteiniphilum genomes. Overall, our study provides an updated phylogenomic relationship of members of the genus Cysteiniphilum and comprehensive genomic insights into this rare emerging pathogen.}, } @article {pmid37243202, year = {2023}, author = {Lobb, B and Shapter, A and Doxey, AC and Nissimov, JI}, title = {Functional Profiling and Evolutionary Analysis of a Marine Microalgal Virus Pangenome.}, journal = {Viruses}, volume = {15}, number = {5}, pages = {}, pmid = {37243202}, issn = {1999-4915}, mesh = {*Phycodnaviridae/genetics ; Genomics ; Phylogeny ; }, abstract = {Phycodnaviridae are large double-stranded DNA viruses, which facilitate studies of host-virus interactions and co-evolution due to their prominence in algal infection and their role in the life cycle of algal blooms. However, the genomic interpretation of these viruses is hampered by a lack of functional information, stemming from the surprising number of hypothetical genes of unknown function. It is also unclear how many of these genes are widely shared within the clade. Using one of the most extensively characterized genera, Coccolithovirus, as a case study, we combined pangenome analysis, multiple functional annotation tools, AlphaFold structural modeling, and literature analysis to compare the core and accessory pangenome and assess support for novel functional predictions. We determined that the Coccolithovirus pangenome shares 30% of its genes with all 14 strains, making up the core. Notably, 34% of its genes were found in at most three strains. Core genes were enriched in early expression based on a transcriptomic dataset of Coccolithovirus EhV-201 algal infection, were more likely to be similar to host proteins than the non-core set, and were more likely to be involved in vital functions such as replication, recombination, and repair. In addition, we generated and collated annotations for the EhV representative EhV-86 from 12 different annotation sources, building up information for 142 previously hypothetical and putative membrane proteins. AlphaFold was further able to predict structures for 204 EhV-86 proteins with a modelling accuracy of good-high. These functional clues, combined with generated AlphaFold structures, provide a foundational framework for the future characterization of this model genus (and other giant viruses) and a further look into the evolution of the Coccolithovirus proteome.}, } @article {pmid37240287, year = {2023}, author = {Xia, L and Wang, H and Zhao, X and Obel, HO and Yu, X and Lou, Q and Chen, J and Cheng, C}, title = {Chloroplast Pan-Genomes and Comparative Transcriptomics Reveal Genetic Variation and Temperature Adaptation in the Cucumber.}, journal = {International journal of molecular sciences}, volume = {24}, number = {10}, pages = {}, pmid = {37240287}, issn = {1422-0067}, support = {2021YFD1200200//National Key R&D Program of China/ ; PZCZ201719//Jiangsu Agricultural Innovation of New Cultivars/ ; }, mesh = {Phylogeny ; *Cucumis sativus/genetics ; *Genome, Chloroplast ; Temperature ; Transcriptome ; Chloroplasts/genetics ; Gene Expression Profiling ; Genetic Variation ; }, abstract = {Although whole genome sequencing, genetic variation mapping, and pan-genome studies have been done on a large group of cucumber nuclear genomes, organelle genome information is largely unclear. As an important component of the organelle genome, the chloroplast genome is highly conserved, which makes it a useful tool for studying plant phylogeny, crop domestication, and species adaptation. Here, we have constructed the first cucumber chloroplast pan-genome based on 121 cucumber germplasms, and investigated the genetic variations of the cucumber chloroplast genome through comparative genomic, phylogenetic, haplotype, and population genetic structure analysis. Meanwhile, we explored the changes in expression of cucumber chloroplast genes under high- and low-temperature stimulation via transcriptome analysis. As a result, a total of 50 complete chloroplast genomes were successfully assembled from 121 cucumber resequencing data, ranging in size from 156,616-157,641 bp. The 50 cucumber chloroplast genomes have typical quadripartite structures, consisting of a large single copy (LSC, 86,339-86,883 bp), a small single copy (SSC, 18,069-18,363 bp), and two inverted repeats (IRs, 25,166-25,797 bp). Comparative genomic, haplotype, and population genetic structure results showed that there is more genetic variation in Indian ecotype cucumbers compared to other cucumber cultivars, which means that many genetic resources remain to be explored in Indian ecotype cucumbers. Phylogenetic analysis showed that the 50 cucumber germplasms could be classified into 3 types: East Asian, Eurasian + Indian, and Xishuangbanna + Indian. The transcriptomic analysis showed that matK were significantly up-regulated under high- and low-temperature stresses, further demonstrating that cucumber chloroplasts respond to temperature adversity by regulating lipid metabolism and ribosome metabolism. Further, accD has higher editing efficiency under high-temperature stress, which may contribute to the heat tolerance. These studies provide useful insight into genetic variation in the chloroplast genome, and established the foundation for exploring the mechanisms of temperature-stimulated chloroplast adaptation.}, } @article {pmid37239397, year = {2023}, author = {Dey, S and Gaur, M and Sykes, EME and Prusty, M and Elangovan, S and Dixit, S and Pati, S and Kumar, A and Subudhi, E}, title = {Unravelling the Evolutionary Dynamics of High-Risk Klebsiella pneumoniae ST147 Clones: Insights from Comparative Pangenome Analysis.}, journal = {Genes}, volume = {14}, number = {5}, pages = {}, pmid = {37239397}, issn = {2073-4425}, mesh = {Humans ; *beta-Lactamases/genetics ; Klebsiella pneumoniae/genetics ; Phylogeny ; Bayes Theorem ; *Klebsiella Infections/epidemiology/genetics/drug therapy ; }, abstract = {BACKGROUND: The high prevalence and rapid emergence of antibiotic resistance in high-risk Klebsiella pneumoniae (KP) ST147 clones is a global health concern and warrants molecular surveillance.

METHODS: A pangenome analysis was performed using publicly available ST147 complete genomes. The characteristics and evolutionary relationships among ST147 members were investigated through a Bayesian phylogenetic analysis.

RESULTS: The large number of accessory genes in the pangenome indicates genome plasticity and openness. Seventy-two antibiotic resistance genes were found to be linked with antibiotic inactivation, efflux, and target alteration. The exclusive detection of the blaOXA-232 gene within the ColKp3 plasmid of KP_SDL79 suggests its acquisition through horizontal gene transfer. The association of seventy-six virulence genes with the acrAB efflux pump, T6SS system and type I secretion system describes its pathogenicity. The presence of Tn6170, a putative Tn7-like transposon in KP_SDL79 with an insertion at the flanking region of the tnsB gene, establishes its transmission ability. The Bayesian phylogenetic analysis estimates ST147's initial divergence in 1951 and the most recent common ancestor for the entire KP population in 1621.

CONCLUSIONS: Present study highlights the genetic diversity and evolutionary dynamics of high-risk clones of K. pneumoniae. Further inter-clonal diversity studies will help us understand its outbreak more precisely and pave the way for therapeutic interventions.}, } @article {pmid37229109, year = {2023}, author = {Jha, UC and Nayyar, H and Chattopadhyay, A and Beena, R and Lone, AA and Naik, YD and Thudi, M and Prasad, PVV and Gupta, S and Dixit, GP and Siddique, KHM}, title = {Major viral diseases in grain legumes: designing disease resistant legumes from plant breeding and OMICS integration.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1183505}, pmid = {37229109}, issn = {1664-462X}, abstract = {Grain legumes play a crucial role in human nutrition and as a staple crop for low-income farmers in developing and underdeveloped nations, contributing to overall food security and agroecosystem services. Viral diseases are major biotic stresses that severely challenge global grain legume production. In this review, we discuss how exploring naturally resistant grain legume genotypes within germplasm, landraces, and crop wild relatives could be used as promising, economically viable, and eco-environmentally friendly solution to reduce yield losses. Studies based on Mendelian and classical genetics have enhanced our understanding of key genetic determinants that govern resistance to various viral diseases in grain legumes. Recent advances in molecular marker technology and genomic resources have enabled us to identify genomic regions controlling viral disease resistance in various grain legumes using techniques such as QTL mapping, genome-wide association studies, whole-genome resequencing, pangenome and 'omics' approaches. These comprehensive genomic resources have expedited the adoption of genomics-assisted breeding for developing virus-resistant grain legumes. Concurrently, progress in functional genomics, especially transcriptomics, has helped unravel underlying candidate gene(s) and their roles in viral disease resistance in legumes. This review also examines the progress in genetic engineering-based strategies, including RNA interference, and the potential of synthetic biology techniques, such as synthetic promoters and synthetic transcription factors, for creating viral-resistant grain legumes. It also elaborates on the prospects and limitations of cutting-edge breeding technologies and emerging biotechnological tools (e.g., genomic selection, rapid generation advances, and CRISPR/Cas9-based genome editing tool) in developing virus-disease-resistant grain legumes to ensure global food security.}, } @article {pmid37228750, year = {2023}, author = {Groza, C and Chen, X and Pacis, A and Simon, MM and Pramatarova, A and Aracena, KA and Pastinen, T and Barreiro, LB and Bourque, G}, title = {Genome graphs detect human polymorphisms in active epigenomic state during influenza infection.}, journal = {Cell genomics}, volume = {3}, number = {5}, pages = {100294}, pmid = {37228750}, issn = {2666-979X}, abstract = {Genetic variants, including mobile element insertions (MEIs), are known to impact the epigenome. We hypothesized that genome graphs, which encapsulate genetic diversity, could reveal missing epigenomic signals. To test this, we sequenced the epigenome of monocyte-derived macrophages from 35 ancestrally diverse individuals before and after influenza infection, allowing us to investigate the role of MEIs in immunity. We characterized genetic variants and MEIs using linked reads and built a genome graph. Mapping epigenetic data revealed 2.3%-3% novel peaks for H3K4me1, H3K27ac chromatin immunoprecipitation sequencing (ChIP-seq), and ATAC-seq. Additionally, the use of a genome graph modified some quantitative trait loci estimates and revealed 375 polymorphic MEIs in an active epigenomic state. Among these is an AluYh3 polymorphism whose chromatin state changed after infection and was associated with the expression of TRIM25, a gene that restricts influenza RNA synthesis. Our results demonstrate that graph genomes can reveal regulatory regions that would have been overlooked by other approaches.}, } @article {pmid37227251, year = {2023}, author = {Tonkin-Hill, G and Corander, J and Parkhill, J}, title = {Challenges in prokaryote pangenomics.}, journal = {Microbial genomics}, volume = {9}, number = {5}, pages = {}, pmid = {37227251}, issn = {2057-5858}, mesh = {Phylogeny ; *Evolution, Molecular ; *Prokaryotic Cells ; Bacteria/genetics ; Gene Transfer, Horizontal ; }, abstract = {Horizontal gene transfer (HGT) and the resulting patterns of gene gain and loss are a fundamental part of bacterial evolution. Investigating these patterns can help us to understand the role of selection in the evolution of bacterial pangenomes and how bacteria adapt to a new niche. Predicting the presence or absence of genes can be a highly error-prone process that can confound efforts to understand the dynamics of horizontal gene transfer. This review discusses both the challenges in accurately constructing a pangenome and the potential consequences errors can have on downstream analyses. We hope that by summarizing these issues researchers will be able to avoid potential pitfalls, leading to improved bacterial pangenome analyses.}, } @article {pmid37224809, year = {2023}, author = {Wisecaver, JH and Auber, RP and Pendleton, AL and Watervoort, NF and Fallon, TR and Riedling, OL and Manning, SR and Moore, BS and Driscoll, WW}, title = {Extreme genome diversity and cryptic speciation in a harmful algal-bloom-forming eukaryote.}, journal = {Current biology : CB}, volume = {33}, number = {11}, pages = {2246-2259.e8}, pmid = {37224809}, issn = {1879-0445}, support = {F32 ES032276/ES/NIEHS NIH HHS/United States ; R21 ES032056/ES/NIEHS NIH HHS/United States ; }, mesh = {Harmful Algal Bloom/physiology ; Phylogeny ; *Haptophyta/genetics ; *Toxins, Biological ; DNA/genetics ; }, abstract = {Harmful algal blooms of the toxic haptophyte Prymnesium parvum are a recurrent problem in many inland and estuarine waters around the world. Strains of P. parvum vary in the toxins they produce and in other physiological traits associated with harmful algal blooms, but the genetic basis for this variation is unknown. To investigate genome diversity in this morphospecies, we generated genome assemblies for 15 phylogenetically and geographically diverse strains of P. parvum, including Hi-C guided, near-chromosome-level assemblies for two strains. Comparative analysis revealed considerable DNA content variation between strains, ranging from 115 to 845 Mbp. Strains included haploids, diploids, and polyploids, but not all differences in DNA content were due to variation in genome copy number. Haploid genome size between strains of different chemotypes differed by as much as 243 Mbp. Syntenic and phylogenetic analyses indicate that UTEX 2797, a common laboratory strain from Texas, is a hybrid that retains two phylogenetically distinct haplotypes. Investigation of gene families variably present across the strains identified several functional categories associated with metabolic and genome size variation in P. parvum, including genes for the biosynthesis of toxic metabolites and proliferation of transposable elements. Together, our results indicate that P. parvum comprises multiple cryptic species. These genomes provide a robust phylogenetic and genomic framework for investigations into the eco-physiological consequences of the intra- and inter-specific genetic variation present in P. parvum and demonstrate the need for similar resources for other harmful algal-bloom-forming morphospecies.}, } @article {pmid37222600, year = {2023}, author = {Tchan, BGO and Ngazoa-Kakou, S and Aka, N and Apia, NKB and Hammoudi, N and Drancourt, M and Saad, J}, title = {PPE Barcoding Identifies Biclonal Mycobacterium ulcerans Buruli Ulcer, Côte d'Ivoire.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0034223}, pmid = {37222600}, issn = {2165-0497}, support = {Fondation Méditerranée Infection//Aix-Marseille Université (AMU)/ ; }, mesh = {Humans ; *Buruli Ulcer/microbiology ; *Mycobacterium ulcerans/genetics ; Cote d'Ivoire ; Real-Time Polymerase Chain Reaction ; Personal Protective Equipment ; }, abstract = {Mycobacterium ulcerans, an environmental opportunistic pathogen, causes necrotic cutaneous and subcutaneous lesions, named Buruli ulcers, in tropical countries. PCR-derived tests used to detect M. ulcerans in environmental and clinical samples do not allow one-shot detection, identification, and typing of M. ulcerans among closely related Mycobacterium marinum complex mycobacteria. We established a 385-member M. marinum/M. ulcerans complex whole-genome sequence database by assembling and annotating 341 M. marinum/M. ulcerans complex genomes and added 44 M. marinum/M. ulcerans complex whole-genome sequences already deposited in the NCBI database. Pangenome, core genome, and single-nucleotide polymorphism (SNP) distance-based comparisons sorted the 385 strains into 10 M. ulcerans taxa and 13 M. marinum taxa, correlating with the geographic origin of strains. Aligning conserved genes identified one PPE (proline-proline-glutamate) gene sequence to be species and intraspecies specific, thereby genotyping the 23 M. marinum/M. ulcerans complex taxa. PCR sequencing of the PPE gene correctly genotyped nine M. marinum/M. ulcerans complex isolates among one M. marinum taxon and three M. ulcerans taxa in the African taxon (T2.4). Further, successful PPE gene PCR sequencing in 15/21 (71.4%) swabs collected from suspected Buruli ulcer lesions in Côte d'Ivoire exhibited positive M. ulcerans IS2404 real-time PCR and identified the M. ulcerans T2.4.1 genotype in eight swabs and M. ulcerans T2.4.1/T2.4.2 mixed genotypes in seven swabs. PPE gene sequencing could be used as a proxy for whole-genome sequencing for the one-shot detection, identification, and typing of clinical M. ulcerans strains, offering an unprecedented tool for identifying M. ulcerans mixed infections. IMPORTANCE We describe a new targeted sequencing approach that characterizes the PPE gene to disclose the simultaneous presence of different variants of a single pathogenic microorganism. This approach has direct implications on the understanding of pathogen diversity and natural history and potential therapeutic implications when dealing with obligate and opportunistic pathogens, such as Mycobacterium ulcerans presented here as a prototype.}, } @article {pmid37221394, year = {2023}, author = {Drott, MT and Park, SC and Wang, YW and Harrow, L and Keller, NP and Pringle, A}, title = {Pangenomics of the death cap mushroom Amanita phalloides, and of Agaricales, reveals dynamic evolution of toxin genes in an invasive range.}, journal = {The ISME journal}, volume = {17}, number = {8}, pages = {1236-1246}, pmid = {37221394}, issn = {1751-7370}, support = {R01 GM112739/GM/NIGMS NIH HHS/United States ; T32 ES007015/ES/NIEHS NIH HHS/United States ; }, mesh = {*Amanita/genetics ; *Agaricales/genetics ; Computational Biology ; }, abstract = {The poisonous European mushroom Amanita phalloides (the "death cap") is invading California. Whether the death caps' toxic secondary metabolites are evolving as it invades is unknown. We developed a bioinformatic pipeline to identify the MSDIN genes underpinning toxicity and probed 88 death cap genomes from an invasive Californian population and from the European range, discovering a previously unsuspected diversity of MSDINs made up of both core and accessory elements. Each death cap individual possesses a unique suite of MSDINs, and toxin genes are significantly differentiated between Californian and European samples. MSDIN genes are maintained by strong natural selection, and chemical profiling confirms MSDIN genes are expressed and result in distinct phenotypes; our chemical profiling also identified a new MSDIN peptide. Toxin genes are physically clustered within genomes. We contextualize our discoveries by probing for MSDINs in genomes from across the order Agaricales, revealing MSDIN diversity originated in independent gene family expansions among genera. We also report the discovery of an MSDIN in an Amanita outside the "lethal Amanitas" clade. Finally, the identification of an MSDIN gene and its associated processing gene (POPB) in Clavaria fumosa suggest the origin of MSDINs is older than previously suspected. The dynamic evolution of MSDINs underscores their potential to mediate ecological interactions, implicating MSDINs in the ongoing invasion. Our data change the understanding of the evolutionary history of poisonous mushrooms, emphasizing striking parallels to convergently evolved animal toxins. Our pipeline provides a roadmap for exploring secondary metabolites in other basidiomycetes and will enable drug prospecting.}, } @article {pmid37217946, year = {2023}, author = {Leonard, AS and Crysnanto, D and Mapel, XM and Bhati, M and Pausch, H}, title = {Graph construction method impacts variation representation and analyses in a bovine super-pangenome.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {124}, pmid = {37217946}, issn = {1474-760X}, mesh = {Animals ; *Cattle/genetics ; *Genome ; Minisatellite Repeats ; *Sequence Analysis, DNA/methods ; }, abstract = {BACKGROUND: Several models and algorithms have been proposed to build pangenomes from multiple input assemblies, but their impact on variant representation, and consequently downstream analyses, is largely unknown.

RESULTS: We create multi-species super-pangenomes using pggb, cactus, and minigraph with the Bos taurus taurus reference sequence and eleven haplotype-resolved assemblies from taurine and indicine cattle, bison, yak, and gaur. We recover 221 k nonredundant structural variations (SVs) from the pangenomes, of which 135 k (61%) are common to all three. SVs derived from assembly-based calling show high agreement with the consensus calls from the pangenomes (96%), but validate only a small proportion of variations private to each graph. Pggb and cactus, which also incorporate base-level variation, have approximately 95% exact matches with assembly-derived small variant calls, which significantly improves the edit rate when realigning assemblies compared to minigraph. We use the three pangenomes to investigate 9566 variable number tandem repeats (VNTRs), finding 63% have identical predicted repeat counts in the three graphs, while minigraph can over or underestimate the count given its approximate coordinate system. We examine a highly variable VNTR locus and show that repeat unit copy number impacts the expression of proximal genes and non-coding RNA.

CONCLUSIONS: Our findings indicate good consensus between the three pangenome methods but also show their individual strengths and weaknesses that need to be considered when analysing different types of variants from multiple input assemblies.}, } @article {pmid37217755, year = {2023}, author = {}, title = {Combining reference genomes into a pangenome graph improves accuracy and reduces bias.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37217755}, issn = {1546-1696}, } @article {pmid37216590, year = {2023}, author = {Geoffroy, V and Lamouche, JB and Guignard, T and Nicaise, S and Kress, A and Scheidecker, S and Le Béchec, A and Muller, J}, title = {The AnnotSV webserver in 2023: updated visualization and ranking.}, journal = {Nucleic acids research}, volume = {51}, number = {W1}, pages = {W39-W45}, pmid = {37216590}, issn = {1362-4962}, mesh = {Humans ; Genome, Human ; High-Throughput Nucleotide Sequencing ; *INDEL Mutation ; *Polymorphism, Single Nucleotide ; Restriction Mapping ; Sequence Analysis, DNA ; Whole Genome Sequencing ; Disease/genetics ; *Software ; }, abstract = {Much of the human genetics variant repertoire is composed of single nucleotide variants (SNV) and small insertion/deletions (indel) but structural variants (SV) remain a major part of our modified DNA. SV detection has often been a complex question to answer either because of the necessity to use different technologies (array CGH, SNP array, Karyotype, Optical Genome Mapping…) to detect each category of SV or to get an appropriate resolution (Whole Genome Sequencing). Thanks to the deluge of pangenomic analysis, Human geneticists are accumulating SV and their interpretation remains time consuming and challenging. The AnnotSV webserver (https://www.lbgi.fr/AnnotSV/) aims at being an efficient tool to (i) annotate and interpret SV potential pathogenicity in the context of human diseases, (ii) recognize potential false positive variants from all the SV identified and (iii) visualize the patient variants repertoire. The most recent developments in the AnnotSV webserver are: (i) updated annotations sources and ranking, (ii) three novel output formats to allow diverse utilization (analysis, pipelines), as well as (iii) two novel user interfaces including an interactive circos view.}, } @article {pmid37214944, year = {2023}, author = {Fan, J and Khan, J and Singh, NP and Pibiri, GE and Patro, R}, title = {Fulgor: A fast and compact k-mer index for large-scale matching and color queries.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37214944}, abstract = {The problem of sequence identification or matching - determining the subset of references from a given collection that are likely to contain a query nucleotide sequence - is relevant for many important tasks in Computational Biology, such as metagenomics and pan-genome analysis. Due to the complex nature of such analyses and the large scale of the reference collections a resource efficient solution to this problem is of utmost importance. The reference collection should therefore be pre-processed into an index for fast queries. This poses the threefold challenge of designing an index that is efficient to query, has light memory usage, and scales well to large collections. To solve this problem, we describe how recent advancements in associative, order-preserving, k-mer dictionaries can be combined with a compressed inverted index to implement a fast and compact colored de Bruijn graph data structure. This index takes full advantage of the fact that unitigs in the colored de Bruijn graph are monochromatic (all k-mers in a unitig have the same set of references of origin, or "color"), leveraging the order-preserving property of its dictionary. In fact, k-mers are kept in unitig order by the dictionary, thereby allowing for the encoding of the map from k-mers to their inverted lists in as little as 1 +o(1) bits per unitig. Hence, one inverted list per unitig is stored in the index with almost no space/time overhead. By combining this property with simple but effective compression methods for inverted lists, the index achieves very small space. We implement these methods in a tool called Fulgor. Compared to Themisto, the prior state of the art, Fulgor indexes a heterogeneous collection of 30,691 bacterial genomes in 3.8× less space, a collection of 150,000 Salmonella enterica genomes in approximately 2× less space, is at least twice as fast for color queries, and is 2 - 6&times faster to construct.}, } @article {pmid37214799, year = {2023}, author = {Ferrero-Serrano, Á and Chakravorty, D and Kirven, KJ and Assmann, SM}, title = {Oryza CLIMtools: An Online Portal for Investigating Genome-Environment Associations in Rice.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {37214799}, support = {R01 GM126079/GM/NIGMS NIH HHS/United States ; T32 GM102057/GM/NIGMS NIH HHS/United States ; }, abstract = {Elite crop varieties display an evident mismatch between their current distributions and the suitability of the local climate for their productivity. To this end, we present Oryza CLIMtools (https://gramene.org/CLIMtools/oryza_v1.0/), the first resource for pan-genome prediction of climate-associated genetic variants in a crop species. Oryza CLIMtools consists of interactive web-based databases that allow the user to: i) explore the local environments of traditional rice varieties (landraces) in South-Eastern Asia, and; ii) investigate the environment × genome associations for 658 Indica and 283 Japonica rice landrace accessions collected from geo-referenced local environments and included in the 3K Rice Genomes Project. We exemplify the value of these resources, identifying an interplay between flowering time and temperature in the local environment that is facilitated by adaptive natural variation in OsHD2 and disrupted by maladaptive variation in OsSOC1 . Prior QTL analysis has suggested the importance of heterotrimeric G proteins in the control of agronomic traits. Accordingly, we analyzed the climate associations of natural variants in the different heterotrimeric G protein subunits. We identified a coordinated role of G proteins in adaptation to the prevailing Potential Evapotranspiration gradient and their regulation of key agronomic traits including plant height and seed and panicle length. We conclude by highlighting the prospect of targeting heterotrimeric G proteins to produce crops that are climate-change-ready.}, } @article {pmid37213867, year = {2023}, author = {Zachariasen, T and Petersen, AØ and Brejnrod, A and Vestergaard, GA and Eklund, A and Nielsen, HB}, title = {Identification of representative species-specific genes for abundance measurements.}, journal = {Bioinformatics advances}, volume = {3}, number = {1}, pages = {vbad060}, pmid = {37213867}, issn = {2635-0041}, abstract = {MOTIVATION: Metagenomic binning facilitates the reconstruction of genomes and identification of Metagenomic Species Pan-genomes or Metagenomic Assembled Genomes. We propose a method for identifying a set of de novo representative genes, termed signature genes, which can be used to measure the relative abundance and used as markers of each metagenomic species with high accuracy.

RESULTS: An initial set of the 100 genes that correlate with the median gene abundance profile of the entity is selected. A variant of the coupon collector's problem was utilized to evaluate the probability of identifying a certain number of unique genes in a sample. This allows us to reject the abundance measurements of strains exhibiting a significantly skewed gene representation. A rank-based negative binomial model is employed to assess the performance of different gene sets across a large set of samples, facilitating identification of an optimal signature gene set for the entity. When benchmarked the method on a synthetic gene catalog, our optimized signature gene sets estimate relative abundance significantly closer to the true relative abundance compared to the starting gene sets extracted from the metagenomic species. The method was able to replicate results from a study with real data and identify around three times as many metagenomic entities.

The code used for the analysis is available on GitHub: https://github.com/trinezac/SG_optimization.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics Advances online.}, } @article {pmid37213168, year = {2023}, author = {Youngblom, MA and Shockey, AC and Callaghan, MM and Dillard, JP and Pepperell, CS}, title = {The Gonococcal Genetic Island defines distinct sub-populations of Neisseria gonorrhoeae.}, journal = {Microbial genomics}, volume = {9}, number = {5}, pages = {}, pmid = {37213168}, issn = {2057-5858}, support = {R01 AI047958/AI/NIAID NIH HHS/United States ; R01 AI113287/AI/NIAID NIH HHS/United States ; }, mesh = {Humans ; *Neisseria gonorrhoeae/genetics ; DNA ; *Gonorrhea ; Type IV Secretion Systems/genetics ; Genomics ; }, abstract = {The incidence of gonorrhoea is increasing at an alarming pace, and therapeutic options continue to narrow as a result of worsening drug resistance. Neisseria gonorrhoeae is naturally competent, allowing the organism to adapt rapidly to selection pressures including antibiotics. A sub-population of N. gonorrhoeae carries the Gonococcal Genetic Island (GGI), which encodes a type IV secretion system (T4SS) that secretes chromosomal DNA. Previous research has shown that the GGI increases transformation efficiency in vitro, but the extent to which it contributes to horizontal gene transfer (HGT) during infection is unknown. Here we analysed genomic data from clinical isolates of N. gonorrhoeae to better characterize GGI+ and GGI- sub-populations and to delineate patterns of variation at the locus itself. We found the element segregating at an intermediate frequency (61%), and it appears to act as a mobile genetic element with examples of gain, loss, exchange and intra-locus recombination within our sample. We further found evidence suggesting that GGI+ and GGI- sub-populations preferentially inhabit distinct niches with different opportunities for HGT. Previously, GGI+ isolates were reported to be associated with more severe clinical infections, and our results suggest this could be related to metal-ion trafficking and biofilm formation. The co-segregation of GGI+ and GGI- isolates despite mobility of the element suggests that both niches inhabited by N. gonorrhoeae remain important to its overall persistence as has been demonstrated previously for cervical- and urethral-adapted sub-populations. These data emphasize the complex population structure of N. gonorrhoeae and its capacity to adapt to diverse niches.}, } @article {pmid37207930, year = {2023}, author = {Qanmber, G and You, Q and Yang, Z and Fan, L and Zhang, Z and Chai, M and Gao, B and Li, F and Yang, Z}, title = {Transcriptional and translational landscape fine-tune genome annotation and explores translation control in cotton.}, journal = {Journal of advanced research}, volume = {}, number = {}, pages = {}, doi = {10.1016/j.jare.2023.05.004}, pmid = {37207930}, issn = {2090-1224}, abstract = {INTRODUCTION: The unavailability of intergenic region annotation in whole genome sequencing and pan-genomics hinders efforts to enhance crop improvement.

OBJECTIVES: Despite advances in research, the impact of post-transcriptional regulation on fiber development and translatome profiling at different stages of fiber growth in cotton (G. hirsutum) remains unexplored.

METHODS: We utilized a combination of reference-guided de novo transcriptome assembly and ribosome profiling techniques to uncover the hidden mechanisms of translational control in eight distinct tissues of upland cotton.

RESULTS: Our study identified P-site distribution at three-nucleotide periodicity and dominant ribosome footprint at 27 nucleotides. Specifically, we have detected 1,589 small open reading frames (sORFs), including 1,376 upstream ORFs (uORFs) and 213 downstream ORFs (dORFs), as well as 552 long non-coding RNAs (lncRNAs) with potential coding functions, which fine-tune the annotation of the cotton genome. Further, we have identified novel genes and lncRNAs with strong translation efficiency (TE), while sORFs were found to affect mRNA transcription levels during fiber elongation. The reliability of these findings was confirmed by the high consistency in correlation and synergetic fold change between RNA-sequencing (RNA-seq) and Ribosome-sequencing (Ribo-seq) analyses. Additionally, integrated omics analysis of the normal fiber ZM24 and short fiber pag1 cotton mutant revealed several differentially expressed genes (DEGs), and fiber-specific expressed (high/low) genes associated with sORFs (uORFs and dORFs). These findings were further supported by the overexpression and knockdown of GhKCS6, a gene associated with sORFs in cotton, and demonstrated the potential regulation of the mechanism governing fiber elongation on both the transcriptional and post-transcriptional levels.

CONCLUSION: Reference-guided transcriptome assembly and the identification of novel transcripts fine-tune the annotation of the cotton genome and predicted the landscape of fiber development. Our approach provided a high-throughput method, based on multi-omics, for discovering unannotated ORFs, hidden translational control, and complex regulatory mechanisms in crop plants.}, } @article {pmid37202927, year = {2023}, author = {Zhang, B and Huang, H and Tibbs-Cortes, LE and Vanous, A and Zhang, Z and Sanguinet, K and Garland-Campbell, KA and Yu, J and Li, X}, title = {Streamline unsupervised machine learning to survey and graph indel-based haplotypes from pan-genomes.}, journal = {Molecular plant}, volume = {16}, number = {6}, pages = {975-978}, doi = {10.1016/j.molp.2023.05.005}, pmid = {37202927}, issn = {1752-9867}, mesh = {*Unsupervised Machine Learning ; Haplotypes/genetics ; *Genome ; Algorithms ; INDEL Mutation/genetics ; }, } @article {pmid37202771, year = {2023}, author = {Ahmed, OY and Rossi, M and Gagie, T and Boucher, C and Langmead, B}, title = {SPUMONI 2: improved classification using a pangenome index of minimizer digests.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {122}, pmid = {37202771}, issn = {1474-760X}, support = {R01HG011392/HG/NHGRI NIH HHS/United States ; }, mesh = {*Algorithms ; *Genomics ; Metagenomics ; Databases, Factual ; Sequence Analysis, DNA ; }, abstract = {Genomics analyses use large reference sequence collections, like pangenomes or taxonomic databases. SPUMONI 2 is an efficient tool for sequence classification of both short and long reads. It performs multi-class classification using a novel sampled document array. By incorporating minimizers, SPUMONI 2's index is 65 times smaller than minimap2's for a mock community pangenome. SPUMONI 2 achieves a speed improvement of 3-fold compared to SPUMONI and 15-fold compared to minimap2. We show SPUMONI 2 achieves an advantageous mix of accuracy and efficiency in practical scenarios such as adaptive sampling, contamination detection and multi-class metagenomics classification.}, } @article {pmid37202587, year = {2023}, author = {Anbazhagan, S and Himani, KM and Karthikeyan, R and Prakasan, L and Dinesh, M and Nair, SS and Lalsiamthara, J and Abhishek, and Ramachandra, SG and Chaturvedi, VK and Chaudhuri, P and Thomas, P}, title = {Comparative genomics of Brucella abortus and Brucella melitensis unravels the gene sharing, virulence factors and SNP diversity among the standard, vaccine and field strains.}, journal = {International microbiology : the official journal of the Spanish Society for Microbiology}, volume = {}, number = {}, pages = {}, pmid = {37202587}, issn = {1618-1905}, abstract = {Brucella abortus and Brucella melitensis are the primary etiological agents of brucellosis in large and small ruminants, respectively. There are limited comparative genomic studies involving Brucella strains that explore the relatedness among both species. In this study, we involved strains (n=44) representing standard, vaccine and Indian field origin for pangenome, single nucleotide polymorphism (SNP) and phylogenetic analysis. Both species shared a common gene pool representing 2884 genes out of a total 3244 genes. SNP-based phylogenetic analysis indicated higher SNP diversity among B. melitensis (3824) strains in comparison to B. abortus (540) strains, and a clear demarcation was identified between standard/vaccine and field strains. The analysis for virulence genes revealed that virB3, virB7, ricA, virB5, ipx5, wbkC, wbkB, and acpXL genes were highly conserved in most of the Brucella strains. Interestingly, virB10 gene was found to have high variability among the B. abortus strains. The cgMLST analysis revealed distinct sequence types for the standard/vaccine and field strains. B. abortus strains from north-eastern India fall within similar sequence type differing from other strains. In conclusion, the analysis revealed a highly shared core genome among two Brucella species. SNP analysis revealed B. melitensis strains exhibit high diversity as compared to B. abortus strains. Strains with absence or high polymorphism of virulence genes can be exploited for the development of novel vaccine candidates effective against both B. abortus and B. melitensis.}, } @article {pmid37196842, year = {2023}, author = {Tian, R and Xu, S and Li, P and Li, M and Liu, Y and Wang, K and Liu, G and Li, Y and Dai, L and Zhang, W}, title = {Characterization of G-type Clostridium perfringens bacteriophages and their disinfection effect on chicken meat.}, journal = {Anaerobe}, volume = {81}, number = {}, pages = {102736}, doi = {10.1016/j.anaerobe.2023.102736}, pmid = {37196842}, issn = {1095-8274}, mesh = {Animals ; Humans ; Clostridium perfringens/genetics ; *Bacteriophages/genetics ; Chickens ; Disinfection ; Phylogeny ; Anti-Bacterial Agents/pharmacology ; *Clostridium Infections/prevention & control/veterinary ; *Poultry Diseases ; *Enteritis ; Meat ; }, abstract = {OBJECTIVE: Clostridium perfringens is one of most important bacterial pathogens in the poultry industry and mainly causes necrotizing enteritis (NE). This pathogen and its toxins can cause foodborne diseases in humans through the food chain. In China, with the rise of antibiotic resistance and the banning of antibiotic growth promoters (AGPs) in poultry farming, food contamination and NE are becoming more prevalent. Bacteriophages are a viable technique to control C. perfringens as an alternative to antibiotics. We isolated Clostridium phage from the environment, providing a new method for the prevention of NE and C. perfringens contamination in meat.

METHODS: In this study, we selected C. perfringens strains from various regions and animal sources in China for phage isolation. The biological characteristics of Clostridium phage were studied in terms of host range, MOI, one-step curve, temperature and pH stability. We sequenced and annotated the genome of the Clostridium phage and performed phylogenetic and pangenomic analyses. Finally, we studied its antibacterial activity against bacterial culture and its disinfection effect against C. perfringens in meat.

RESULTS: A Clostridium phage, named ZWPH-P21 (P21), was isolated from chicken farm sewage in Jiangsu, China. P21 has been shown to specifically lyse C. perfringens type G. Further analysis of basic biological characteristics showed that P21 was stable under the conditions of pH 4-11 and temperature 4-60 °C, and the optimal multiple severity of infection (MOI) was 0.1. In addition, P21 could form a "halo" on agar plates, suggesting that the phage may encode depolymerase. Genome sequence analysis showed that P21 was the most closely related to Clostridium phage CPAS-15 belonging to the Myoviridae family, with a recognition rate of 97.24% and a query coverage rate of 98%. No virulence factors or drug resistance genes were found in P21. P21 showed promising antibacterial activity in vitro and in chicken disinfection experiments. In conclusion, P21 has the potential to be used for preventing and controlling C. perfringens in chicken food production.}, } @article {pmid37195730, year = {2023}, author = {Tanwar, AS and Shruptha, P and Jnana, A and Brand, A and Ballal, M and Satyamoorthy, K and Murali, TS}, title = {Emerging Pathogens in Planetary Health and Lessons from Comparative Genome Analyses of Three Clostridia Species.}, journal = {Omics : a journal of integrative biology}, volume = {27}, number = {6}, pages = {247-259}, doi = {10.1089/omi.2023.0034}, pmid = {37195730}, issn = {1557-8100}, mesh = {Aged ; Humans ; *Clostridioides difficile/genetics ; Proteomics ; *Clostridium Infections ; Virulence/genetics ; Genomics ; }, abstract = {Clostridioides difficile (CD) is a major planetary health burden. A Gram-positive opportunistic pathogen, CD, colonizes the large intestine and is implicated in sepsis, pseudomembranous colitis, and colorectal cancer. C. difficile infection typically following antibiotic exposure results in dysbiosis of the gut microbiome, and is one of the leading causes of diarrhea in the elderly population. While several studies have focused on the toxigenic strains of CD, gut commensals such as Clostridium butyricum (CB) and Clostridium tertium (CT) could harbor toxin/virulence genes, and thus pose a threat to human health. In this study, we sequenced and characterized three isolates, namely, CT (MALS001), CB (MALS002), and CD (MALS003) for their antimicrobial, cytotoxic, antiproliferative, genomic, and proteomic profiles. Although in vitro cytotoxic and antiproliferative potential were observed predominantly in CD MALS003, genome analysis revealed pathogenic potential of CB MALS002 and CT MALS001. Pangenome analysis revealed the presence of several accessory genes typically involved in fitness, virulence, and resistance characteristics in the core genomes of sequenced strains. The presence of an array of virulence and antimicrobial resistance genes in CB MALS002 and CT MALS001 suggests their potential role as emerging pathogens with significant impact on planetary health.}, } @article {pmid37195188, year = {2023}, author = {Murik, O and Zeevi, DA and Mann, T and Kashat, L and Assous, MV and Megged, O and Yagupsky, P}, title = {Whole-Genome Sequencing Reveals Differences among Kingella kingae Strains from Carriers and Patients with Invasive Infections.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0389522}, pmid = {37195188}, issn = {2165-0497}, mesh = {Humans ; Child, Preschool ; *Kingella kingae/genetics ; Virulence/genetics ; Virulence Factors/genetics ; *Endocarditis ; *Bacteremia/pathology ; }, abstract = {As a result of the increasing use of sensitive nucleic acid amplification tests, Kingella kingae is being recognized as a common pathogen of early childhood, causing medical conditions ranging from asymptomatic oropharyngeal colonization to bacteremia, osteoarthritis, and life-threatening endocarditis. However, the genomic determinants associated with the different clinical outcomes are unknown. Employing whole-genome sequencing, we studied 125 international K. kingae isolates derived from 23 healthy carriers and 102 patients with invasive infections, including bacteremia (n = 23), osteoarthritis (n = 61), and endocarditis (n = 18). We compared their genomic structures and contents to identify genomic determinants associated with the different clinical conditions. The mean genome size of the strains was 2,024,228 bp, and the pangenome comprised 4,026 predicted genes, of which 1,460 (36.3%) were core genes shared by >99% of the isolates. No single gene discriminated between carried and invasive strains; however, 43 genes were significantly more frequent in invasive isolates, compared to asymptomatically carried organisms, and a few showed a significant differential distribution among isolates from skeletal system infections, bacteremia, and endocarditis. The gene encoding the iron-regulated protein FrpC was uniformly absent in all 18 endocarditis-associated strains but was present in one-third of other invasive isolates. Similar to other members of the Neisseriaceae family, the K. kingae differences in invasiveness and tropism for specific body tissues appear to depend on combinations of multiple virulence-associated determinants that are widely distributed throughout the genome. The potential role of the absence of the FrpC protein in the pathogenesis of endocardial invasion deserves further investigation. IMPORTANCE The wide range of clinical severities exhibited by invasive Kingella kingae infections strongly suggests that isolates differ in their genomic contents, and strains associated with life-threatening endocarditis may harbor distinct genomic determinants that result in cardiac tropism and severe tissue damage. The results of the present study show that no single gene discriminated between asymptomatically carried isolates and invasive strains. However, 43 putative genes were significantly more frequent among invasive isolates than among pharyngeal colonizers. In addition, several genes displayed a significant differential distribution among isolates from bacteremia, skeletal system infections, and endocarditis, suggesting that the virulence and tissue tropism of K. kingae are multifactorial and polygenic, depending on changes in the allele content and genomic organization. Further analysis of these putative genes may identify genomic determinants of the invasiveness of K. kingae and its affinity for specific body tissues and potential targets for a future protective vaccine.}, } @article {pmid37193328, year = {2023}, author = {Kalaivanan, NS and Ghoshal, T and Lakshmi, MA and Mondal, KK and Kulshreshtha, A and Singh, KBM and Thakur, JK and Supriya, P and Bhatnagar, S and Mani, C}, title = {Complete genome resource unravels the close relation of an Indian Xanthomonas oryzae pv. oryzae strain IXOBB0003 with Philippines strain causing bacterial blight of rice.}, journal = {3 Biotech}, volume = {13}, number = {6}, pages = {187}, pmid = {37193328}, issn = {2190-572X}, abstract = {UNLABELLED: Xanthomonas oryzae pv. oryzae (Xoo) is a pathogen of concern for rice growers as it limits the production potential of rice varieties worldwide. Due to their high genomic plasticity, the pathogen continues to evolve, nullifying the deployed resistance mechanisms. It is pertinent to monitor the evolving Xoo population for the virulent novel stains, and the affordable sequencing technologies made the task feasible with an in-depth understanding of their pathogenesis arsenals. We present the complete genome of a highly virulent Indian Xoo strain IXOBB0003, predominantly found in northwestern parts of India, by employing next-generation sequencing and single-molecule sequencing in real-time technologies. The final genome assembly comprises 4,962,427 bp and has 63.96% GC content. The pan genome analysis reveals that strain IXOBB0003 houses total of 3655 core genes, 1276 accessory genes and 595 unique genes. Comparative analysis of the predicted gene clusters of coding sequences and protein count of strain IXOBB0003 depicts 3687 of almost 90% gene clusters shared by other Asian strains, 17 unique to IXOBB0003 and 139 CDSs of IXOBB0003 are shared with PXO99[A]. AnnoTALE-based studies revealed 16 TALEs conferred from the whole genome sequence. Prominent TALEs of our strain are found orthologous to TALEs of the Philippines strain PXO99[A]. The genomic features of Indian Xoo strain IXOBB0003 and in comparison with other Asian strains would certainly contribute significantly while formulating novel strategies for BB management.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-023-03596-x.}, } @article {pmid37192177, year = {2023}, author = {Price, RJ and Davik, J and Fernandéz Fernandéz, F and Bates, HJ and Lynn, S and Nellist, CF and Buti, M and Røen, D and Šurbanovski, N and Alsheikh, M and Harrison, RJ and Sargent, DJ}, title = {Chromosome-scale genome sequence assemblies of the 'Autumn Bliss' and 'Malling Jewel' cultivars of the highly heterozygous red raspberry (Rubus idaeus L.) derived from long-read Oxford Nanopore sequence data.}, journal = {PloS one}, volume = {18}, number = {5}, pages = {e0285756}, pmid = {37192177}, issn = {1932-6203}, mesh = {*Rubus/genetics ; *Nanopores ; Genome ; Genomics ; Sequence Analysis, DNA ; Centromere ; }, abstract = {Red raspberry (Rubus idaeus L.) is an economically valuable soft-fruit species with a relatively small (~300 Mb) but highly heterozygous diploid (2n = 2x = 14) genome. Chromosome-scale genome sequences are a vital tool in unravelling the genetic complexity controlling traits of interest in crop plants such as red raspberry, as well as for functional genomics, evolutionary studies, and pan-genomics diversity studies. In this study, we developed genome sequences of a primocane fruiting variety ('Autumn Bliss') and a floricane variety ('Malling Jewel'). The use of long-read Oxford Nanopore Technologies sequencing data yielded long read lengths that permitted well resolved genome sequences for the two cultivars to be assembled. The de novo assemblies of 'Malling Jewel' and 'Autumn Bliss' contained 79 and 136 contigs respectively, and 263.0 Mb of the 'Autumn Bliss' and 265.5 Mb of the 'Malling Jewel' assembly could be anchored unambiguously to a previously published red raspberry genome sequence of the cultivar 'Anitra'. Single copy ortholog analysis (BUSCO) revealed high levels of completeness in both genomes sequenced, with 97.4% of sequences identified in 'Autumn Bliss' and 97.7% in 'Malling Jewel'. The density of repetitive sequence contained in the 'Autumn Bliss' and 'Malling Jewel' assemblies was significantly higher than in the previously published assembly and centromeric and telomeric regions were identified in both assemblies. A total of 42,823 protein coding regions were identified in the 'Autumn Bliss' assembly, whilst 43,027 were identified in the 'Malling Jewel' assembly. These chromosome-scale genome sequences represent an excellent genomics resource for red raspberry, particularly around the highly repetitive centromeric and telomeric regions of the genome that are less complete in the previously published 'Anitra' genome sequence.}, } @article {pmid37186225, year = {2023}, author = {Kuzmanović, N and diCenzo, GC and Bunk, B and Spröer, C and Frühling, A and Neumann-Schaal, M and Overmann, J and Smalla, K}, title = {Genomics of the "tumorigenes" clade of the family Rhizobiaceae and description of Rhizobium rhododendri sp. nov.}, journal = {MicrobiologyOpen}, volume = {12}, number = {2}, pages = {e1352}, pmid = {37186225}, issn = {2045-8827}, mesh = {Phylogeny ; DNA, Bacterial/genetics ; *Rhizobiaceae ; *Rhizobium/genetics ; Agrobacterium/genetics ; Genomics ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Fatty Acids ; Bacterial Typing Techniques ; }, abstract = {Tumorigenic members of the family Rhizobiaceae, known as agrobacteria, are responsible for crown and cane gall diseases of various crops worldwide. Tumorigenic agrobacteria are commonly found in the genera Agrobacterium, Allorhizobium, and Rhizobium. In this study, we analyzed a distinct "tumorigenes" clade of the genus Rhizobium, which includes the tumorigenic species Rhizobium tumorigenes, as well as strains causing crown gall disease on rhododendron. Here, high-quality, closed genomes of representatives of the "tumorigenes" clade were generated, followed by comparative genomic and phylogenomic analyses. Additionally, the phenotypic characteristics of representatives of the "tumorigenes" clade were analyzed. Our results showed that the tumorigenic strains isolated from rhododendron represent a novel species of the genus Rhizobium for which the name Rhizobium rhododendri sp. nov. is proposed. This species also includes additional strains originating from blueberry and Himalayan blackberry in the United States, whose genome sequences were retrieved from GenBank. Both R. tumorigenes and R. rhododendri contain multipartite genomes, including a chromosome, putative chromids, and megaplasmids. Synteny and phylogenetic analyses indicated that a large putative chromid of R. rhododendri resulted from the cointegration of an ancestral megaplasmid and two putative chromids, following its divergence from R. tumorigenes. Moreover, gene clusters specific for both species of the "tumorigenes" clade were identified, and their biological functions and roles in the ecological diversification of R. rhododendri and R. tumorigenes were predicted and discussed.}, } @article {pmid37180381, year = {2023}, author = {Pham, HH and Kim, DH and Nguyen, TL}, title = {Wide-genome selection of lactic acid bacteria harboring genes that promote the elimination of antinutritional factors.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1145041}, pmid = {37180381}, issn = {1664-462X}, abstract = {Anti-nutritional factors (ANFs) substances in plant products, such as indigestible non-starchy polysaccharides (α-galactooligosaccharides, α-GOS), phytate, tannins, and alkaloids can impede the absorption of many critical nutrients and cause major physiological disorders. To enhance silage quality and its tolerance threshold for humans as well as other animals, ANFs must be reduced. This study aims to identify and compare the bacterial species/strains that are potential use for industrial fermentation and ANFs reduction. A pan-genome study of 351 bacterial genomes was performed, and binary data was processed to quantify the number of genes involved in the removal of ANFs. Among four pan-genomes analysis, all 37 tested Bacillus subtilis genomes had one phytate degradation gene, while 91 out of 150 Enterobacteriacae genomes harbor at least one genes (maximum three). Although, no gene encoding phytase detected in genomes of Lactobacillus and Pediococcus species, they have genes involving indirectly in metabolism of phytate-derivatives to produce Myo-inositol, an important compound in animal cells physiology. In contrast, genes related to production of lectin, tannase and saponin degrading enzyme did not include in genomes of B. subtilis and Pediococcus species. Our findings suggest a combination of bacterial species and/or unique strains in fermentation, for examples, two Lactobacillus strains (DSM 21115 and ATCC 14869) with B. subtilis SRCM103689, would maximize the efficiency in reducing the ANFs concentration. In conclusion, this study provides insights into bacterial genomes analysis for maximizing nutritional value in plant-based food. Further investigations of gene numbers and repertories correlated to metabolism of different ANFs will help clarifying the efficiency of time consuming and food qualities.}, } @article {pmid37180261, year = {2023}, author = {Meng, X and Chen, F and Xiong, M and Hao, H and Wang, KJ}, title = {A new pathogenic isolate of Kocuria kristinae identified for the first time in the marine fish Larimichthys crocea.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1129568}, pmid = {37180261}, issn = {1664-302X}, abstract = {In recent years, new emerging pathogenic microorganisms have frequently appeared in animals, including marine fish, possibly due to climate change, anthropogenic activities, and even cross-species transmission of pathogenic microorganisms among animals or between animals and humans, which poses a serious issue for preventive medicine. In this study, a bacterium was clearly characterized among 64 isolates from the gills of diseased large yellow croaker Larimichthys crocea that were raised in marine aquaculture. This strain was identified as K. kristinae by biochemical tests with a VITEK 2.0 analysis system and 16S rRNA sequencing and named K. kristinae_LC. The potential genes that might encode virulence-factors were widely screened through sequence analysis of the whole genome of K. kristinae_LC. Many genes involved in the two-component system and drug-resistance were also annotated. In addition, 104 unique genes in K. kristinae_LC were identified by pan genome analysis with the genomes of this strain from five different origins (woodpecker, medical resource, environment, and marine sponge reef) and the analysis results demonstrated that their predicted functions might be associated with adaptation to living conditions such as higher salinity, complex marine biomes, and low temperature. A significant difference in genomic organization was found among the K. kristinae strains that might be related to their hosts living in different environments. The animal regression test for this new bacterial isolate was carried out using L. crocea, and the results showed that this bacterium could cause the death of L. crocea and that the fish mortality was dose-dependent within 5 days post infection, indicating the pathogenicity of K. kristinae_LC to marine fish. Since K. kristinae has been reported as a pathogen for humans and bovines, in our study, we revealed a new isolate of K. kristinae_LC from marine fish for the first time, suggesting the potentiality of cross-species transmission among animals or from marine animals to humans, from which we would gain insight to help in future public prevention strategies for new emerging pathogens.}, } @article {pmid37175750, year = {2023}, author = {An, B and Cai, H and Li, B and Zhang, S and He, Y and Wang, R and Jiao, C and Guo, Y and Xu, L and Xu, Y}, title = {Molecular Evolution of Histone Methylation Modification Families in the Plant Kingdom and Their Genome-Wide Analysis in Barley.}, journal = {International journal of molecular sciences}, volume = {24}, number = {9}, pages = {}, pmid = {37175750}, issn = {1422-0067}, support = {2021CFA064//Hubei Outstanding Youth Fund/ ; 2021BBA225//Hubei Key Research and Development Program/ ; 2021-620-000-001-01//Hubei Agricultural Science and Tech-nology Innovation Center Innovation Team Project/ ; }, mesh = {Humans ; *Hordeum/metabolism ; Histones/genetics/metabolism ; Methylation ; Plants/metabolism ; Phylogeny ; Evolution, Molecular ; Gene Expression Regulation, Plant ; Plant Proteins/genetics/metabolism ; Genome, Plant ; }, abstract = {In this study, based on the OneKP database and through comparative genetic analysis, we found that HMT and HDM may originate from Chromista and are highly conserved in green plants, and that during the evolution from algae to land plants, histone methylation modifications gradually became complex and diverse, which is more conducive to the adaptation of plants to complex and variable environments. We also characterized the number of members, genetic similarity, and phylogeny of HMT and HDM families in barley using the barley pangenome and the Tibetan Lasa Goumang genome. The results showed that HMT and HDM were highly conserved in the domestication of barley, but there were some differences in the Lasa Goumang SDG subfamily. Expression analysis showed that HvHMTs and HvHDMs were highly expressed in specific tissues and had complex expression patterns under multiple stress treatments. In summary, the amplification and variation of HMT and HDM facilitate plant adaptation to complex terrestrial environments, while they are highly conserved in barley and play an important role in barley growth and development with abiotic stresses. In brief, our findings provide a novel perspective on the origin and evolutionary history of plant HvHMTs and HvHDMs, and lay a foundation for further investigation of their functions in barley.}, } @article {pmid37173388, year = {2023}, author = {Abdella, B and Abozahra, NA and Shokrak, NM and Mohamed, RA and El-Helow, ER}, title = {Whole spectrum of Aeromonas hydrophila virulence determinants and the identification of novel SNPs using comparative pathogenomics.}, journal = {Scientific reports}, volume = {13}, number = {1}, pages = {7712}, pmid = {37173388}, issn = {2045-2322}, mesh = {Animals ; Humans ; Aeromonas hydrophila ; Virulence/genetics ; Polymorphism, Single Nucleotide ; Virulence Factors/genetics ; *Aeromonas ; Fishes ; *Fish Diseases/microbiology ; *Gram-Negative Bacterial Infections/veterinary/microbiology ; }, abstract = {Aeromonas hydrophila is a ubiquitous fish pathogen and an opportunistic human pathogen. It is mostly found in aquatic habitats, but it has also been isolated from food and bottled mineral waters. It causes hemorrhagic septicemia, ulcerative disease, and motile Aeromonas septicemia (MAS) in fish and other aquatic animals. Moreover, it might cause gastroenteritis, wound infections, and septicemia in humans. Different variables influence A. hydrophila virulence, including the virulence genes expressed, host susceptibility, and environmental stresses. The identification of virulence factors for a bacterial pathogen will help in the development of preventive and control measures. 95 Aeromonas spp. genomes were examined in the current study, and 53 strains were determined to be valid A. hydrophila. These genomes were examined for pan- and core-genomes using a comparative genomics technique. A. hydrophila has an open pan-genome with 18,306 total genes and 1620 genes in its core-genome. In the pan-genome, 312 virulence genes have been detected. The effector delivery system category had the largest number of virulence genes (87), followed by immunological modulation and motility genes (69 and 46, respectively). This provides new insight into the pathogenicity of A. hydrophila. In the pan-genome, a few distinctive single-nucleotide polymorphisms (SNPs) have been identified in four genes, namely: D-glycero-beta-D-manno-heptose-1,7-bisphosphate 7-phosphatase, chemoreceptor glutamine deamidase, Spermidine N (1)-acetyltransferase, and maleylpyruvate isomerase, which are present in all A. hydrophila genomes, which make them molecular marker candidates for precise identification of A. hydrophila. Therefore, for precise diagnostic and discrimination results, we suggest these genes be considered when designing primers and probes for sequencing, multiplex-PCR, or real-time PCR.}, } @article {pmid37173271, year = {2023}, author = {Raza, A and Bohra, A and Varshney, RK}, title = {Pan-genome for pearl millet that beats the heat.}, journal = {Trends in plant science}, volume = {28}, number = {8}, pages = {857-860}, doi = {10.1016/j.tplants.2023.04.016}, pmid = {37173271}, issn = {1878-4372}, mesh = {*Pennisetum/genetics ; Hot Temperature ; Plant Breeding ; }, abstract = {A better understanding of crop genomes reveals that structural variations (SVs) are crucial for genetic improvement. A graph-based pan-genome by Yan et al. uncovered 424 085 genomic SVs and provided novel insights into heat tolerance of pearl millet. We discuss how these SVs can fast-track pearl millet breeding under harsh environments.}, } @article {pmid37171844, year = {2023}, author = {Büchler, T and Olbrich, J and Ohlebusch, E}, title = {Efficient short read mapping to a pangenome that is represented by a graph of ED strings.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {5}, pages = {}, pmid = {37171844}, issn = {1367-4811}, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Software ; *Genome, Human ; Algorithms ; }, abstract = {MOTIVATION: A pangenome represents many diverse genome sequences of the same species. In order to cope with small variations as well as structural variations, recent research focused on the development of graph-based models of pangenomes. Mapping is the process of finding the original location of a DNA read in a reference sequence, typically a genome. Using a pangenome instead of a (linear) reference genome can, e.g. reduce mapping bias, the tendency to incorrectly map sequences that differ from the reference genome. Mapping reads to a graph, however, is more complex and needs more resources than mapping to a reference genome. Reducing the complexity of the graph by encoding simple variations like SNPs in a simple way can accelerate read mapping and reduce the memory requirements at the same time.

RESULTS: We introduce graphs based on elastic-degenerate strings (ED strings, EDS) and the linearized form of these EDS graphs as a new representation for pangenomes. In this representation, small variations are encoded directly in the sequence. Structural variations are encoded in a graph structure. This reduces the size of the representation in comparison to sequence graphs. In the linearized form, mapping techniques that are known from ordinary strings can be applied with appropriate adjustments. Since most variations are expressed directly in the sequence, the mapping process rarely has to take edges of the EDS graph into account. We developed a prototypical software tool GED-MAP that uses this representation together with a minimizer index to map short reads to the pangenome. Our experiments show that the new method works on a whole human genome scale, taking structural variants properly into account. The advantage of GED-MAP, compared with other pangenomic short read mappers, is that the new representation allows for a simple indexing method. This makes GED-MAP fast and memory efficient.

Sources are available at: https://github.com/thomas-buechler-ulm/gedmap.}, } @article {pmid37167256, year = {2023}, author = {Riborg, A and Gulla, S and Fiskebeck, EZ and Ryder, D and Verner-Jeffreys, DW and Colquhoun, DJ and Welch, TJ}, title = {Pan-genome survey of the fish pathogen Yersinia ruckeri links accessory- and amplified genes to virulence.}, journal = {PloS one}, volume = {18}, number = {5}, pages = {e0285257}, pmid = {37167256}, issn = {1932-6203}, mesh = {Animals ; Yersinia ruckeri/genetics ; Virulence/genetics ; *Yersinia Infections ; Serogroup ; *Oncorhynchus mykiss ; *Fish Diseases ; }, abstract = {While both virulent and putatively avirulent Yersinia ruckeri strains exist in aquaculture environments, the relationship between the distribution of virulence-associated factors and de facto pathogenicity in fish remains poorly understood. Pan-genome analysis of 18 complete genomes, representing established virulent and putatively avirulent lineages of Y. ruckeri, revealed the presence of a number of accessory genetic determinants. Further investigation of 68 draft genome assemblies revealed that the distribution of certain putative virulence factors correlated well with virulence and host-specificity. The inverse-autotransporter invasin locus yrIlm was, however, the only gene present in all virulent strains, while absent in lineages regarded as avirulent. Strains known to be associated with significant mortalities in salmonid aquaculture display a combination of serotype O1-LPS and yrIlm, with the well-documented highly virulent lineages, represented by MLVA clonal complexes 1 and 2, displaying duplication of the yrIlm locus. Duplication of the yrIlm locus was further found to have evolved over time in clonal complex 1, where some modern, highly virulent isolates display up to three copies.}, } @article {pmid37165242, year = {2023}, author = {Liao, WW and Asri, M and Ebler, J and Doerr, D and Haukness, M and Hickey, G and Lu, S and Lucas, JK and Monlong, J and Abel, HJ and Buonaiuto, S and Chang, XH and Cheng, H and Chu, J and Colonna, V and Eizenga, JM and Feng, X and Fischer, C and Fulton, RS and Garg, S and Groza, C and Guarracino, A and Harvey, WT and Heumos, S and Howe, K and Jain, M and Lu, TY and Markello, C and Martin, FJ and Mitchell, MW and Munson, KM and Mwaniki, MN and Novak, AM and Olsen, HE and Pesout, T and Porubsky, D and Prins, P and Sibbesen, JA and Sirén, J and Tomlinson, C and Villani, F and Vollger, MR and Antonacci-Fulton, LL and Baid, G and Baker, CA and Belyaeva, A and Billis, K and Carroll, A and Chang, PC and Cody, S and Cook, DE and Cook-Deegan, RM and Cornejo, OE and Diekhans, M and Ebert, P and Fairley, S and Fedrigo, O and Felsenfeld, AL and Formenti, G and Frankish, A and Gao, Y and Garrison, NA and Giron, CG and Green, RE and Haggerty, L and Hoekzema, K and Hourlier, T and Ji, HP and Kenny, EE and Koenig, BA and Kolesnikov, A and Korbel, JO and Kordosky, J and Koren, S and Lee, H and Lewis, AP and Magalhães, H and Marco-Sola, S and Marijon, P and McCartney, A and McDaniel, J and Mountcastle, J and Nattestad, M and Nurk, S and Olson, ND and Popejoy, AB and Puiu, D and Rautiainen, M and Regier, AA and Rhie, A and Sacco, S and Sanders, AD and Schneider, VA and Schultz, BI and Shafin, K and Smith, MW and Sofia, HJ and Abou Tayoun, AN and Thibaud-Nissen, F and Tricomi, FF and Wagner, J and Walenz, B and Wood, JMD and Zimin, AV and Bourque, G and Chaisson, MJP and Flicek, P and Phillippy, AM and Zook, JM and Eichler, EE and Haussler, D and Wang, T and Jarvis, ED and Miga, KH and Garrison, E and Marschall, T and Hall, IM and Li, H and Paten, B}, title = {A draft human pangenome reference.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {312-324}, pmid = {37165242}, issn = {1476-4687}, support = {U41 HG010972/HG/NHGRI NIH HHS/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; U24 HG007497/HG/NHGRI NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; Diploidy ; *Genome, Human/genetics ; Haplotypes/genetics ; Sequence Analysis, DNA ; *Genomics/standards ; Reference Standards ; Cohort Studies ; Alleles ; Genetic Variation ; }, abstract = {Here the Human Pangenome Reference Consortium presents a first draft of the human pangenome reference. The pangenome contains 47 phased, diploid assemblies from a cohort of genetically diverse individuals[1]. These assemblies cover more than 99% of the expected sequence in each genome and are more than 99% accurate at the structural and base pair levels. Based on alignments of the assemblies, we generate a draft pangenome that captures known variants and haplotypes and reveals new alleles at structurally complex loci. We also add 119 million base pairs of euchromatic polymorphic sequences and 1,115 gene duplications relative to the existing reference GRCh38. Roughly 90 million of the additional base pairs are derived from structural variation. Using our draft pangenome to analyse short-read data reduced small variant discovery errors by 34% and increased the number of structural variants detected per haplotype by 104% compared with GRCh38-based workflows, which enabled the typing of the vast majority of structural variant alleles per sample.}, } @article {pmid37165241, year = {2023}, author = {Guarracino, A and Buonaiuto, S and de Lima, LG and Potapova, T and Rhie, A and Koren, S and Rubinstein, B and Fischer, C and , and Gerton, JL and Phillippy, AM and Colonna, V and Garrison, E}, title = {Recombination between heterologous human acrocentric chromosomes.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {335-343}, pmid = {37165241}, issn = {1476-4687}, support = {R01 CA266339/CA/NCI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Centromere/genetics ; *Chromosomes, Human/genetics ; DNA, Ribosomal/genetics ; *Recombination, Genetic/genetics ; Translocation, Genetic/genetics ; Cytogenetics ; Telomere/genetics ; }, abstract = {The short arms of the human acrocentric chromosomes 13, 14, 15, 21 and 22 (SAACs) share large homologous regions, including ribosomal DNA repeats and extended segmental duplications[1,2]. Although the resolution of these regions in the first complete assembly of a human genome-the Telomere-to-Telomere Consortium's CHM13 assembly (T2T-CHM13)-provided a model of their homology[3], it remained unclear whether these patterns were ancestral or maintained by ongoing recombination exchange. Here we show that acrocentric chromosomes contain pseudo-homologous regions (PHRs) indicative of recombination between non-homologous sequences. Utilizing an all-to-all comparison of the human pangenome from the Human Pangenome Reference Consortium[4] (HPRC), we find that contigs from all of the SAACs form a community. A variation graph[5] constructed from centromere-spanning acrocentric contigs indicates the presence of regions in which most contigs appear nearly identical between heterologous acrocentric chromosomes in T2T-CHM13. Except on chromosome 15, we observe faster decay of linkage disequilibrium in the pseudo-homologous regions than in the corresponding short and long arms, indicating higher rates of recombination[6,7]. The pseudo-homologous regions include sequences that have previously been shown to lie at the breakpoint of Robertsonian translocations[8], and their arrangement is compatible with crossover in inverted duplications on chromosomes 13, 14 and 21. The ubiquity of signals of recombination between heterologous acrocentric chromosomes seen in the HPRC draft pangenome suggests that these shared sequences form the basis for recurrent Robertsonian translocations, providing sequence and population-based confirmation of hypotheses first developed from cytogenetic studies 50 years ago[9].}, } @article {pmid37165237, year = {2023}, author = {Vollger, MR and Dishuck, PC and Harvey, WT and DeWitt, WS and Guitart, X and Goldberg, ME and Rozanski, AN and Lucas, J and Asri, M and , and Munson, KM and Lewis, AP and Hoekzema, K and Logsdon, GA and Porubsky, D and Paten, B and Harris, K and Hsieh, P and Eichler, EE}, title = {Increased mutation and gene conversion within human segmental duplications.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {325-334}, pmid = {37165237}, issn = {1476-4687}, support = {R01 HG002385/HG/NHGRI NIH HHS/United States ; R35 GM133428/GM/NIGMS NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Gene Conversion/genetics ; Genome, Human/genetics ; *Mutation ; *Segmental Duplications, Genomic ; Polymorphism, Single Nucleotide/genetics ; Haplotypes/genetics ; Exons/genetics ; Cytosine/chemistry ; Guanine/chemistry ; CpG Islands/genetics ; }, abstract = {Single-nucleotide variants (SNVs) in segmental duplications (SDs) have not been systematically assessed because of the limitations of mapping short-read sequencing data[1,2]. Here we constructed 1:1 unambiguous alignments spanning high-identity SDs across 102 human haplotypes and compared the pattern of SNVs between unique and duplicated regions[3,4]. We find that human SNVs are elevated 60% in SDs compared to unique regions and estimate that at least 23% of this increase is due to interlocus gene conversion (IGC) with up to 4.3 megabase pairs of SD sequence converted on average per human haplotype. We develop a genome-wide map of IGC donors and acceptors, including 498 acceptor and 454 donor hotspots affecting the exons of about 800 protein-coding genes. These include 171 genes that have 'relocated' on average 1.61 megabase pairs in a subset of human haplotypes. Using a coalescent framework, we show that SD regions are slightly evolutionarily older when compared to unique sequences, probably owing to IGC. SNVs in SDs, however, show a distinct mutational spectrum: a 27.1% increase in transversions that convert cytosine to guanine or the reverse across all triplet contexts and a 7.6% reduction in the frequency of CpG-associated mutations when compared to unique DNA. We reason that these distinct mutational properties help to maintain an overall higher GC content of SD DNA compared to that of unique DNA, probably driven by GC-biased conversion between paralogous sequences[5,6].}, } @article {pmid37165235, year = {2023}, author = {Massarat, A and Gymrek, M and McStay, B and Jónsson, H}, title = {Human pangenome supports analysis of complex genomic regions.}, journal = {Nature}, volume = {617}, number = {7960}, pages = {256-258}, pmid = {37165235}, issn = {1476-4687}, mesh = {Humans ; *Genomics ; *Genome ; }, } @article {pmid37165229, year = {2023}, author = {Liverpool, L}, title = {First human 'pangenome' aims to catalogue genetic diversity.}, journal = {Nature}, volume = {617}, number = {7961}, pages = {444-445}, pmid = {37165229}, issn = {1476-4687}, mesh = {Humans ; *Genetic Variation/genetics ; *Genome, Human/genetics ; *Genomics/trends ; }, } @article {pmid37165225, year = {2023}, author = {Petrić Howe, N and Bundell, S}, title = {'Pangenome' aims to capture the breadth of human diversity.}, journal = {Nature}, volume = {}, number = {}, pages = {}, doi = {10.1038/d41586-023-01579-9}, pmid = {37165225}, issn = {1476-4687}, } @article {pmid37165083, year = {2023}, author = {Hickey, G and Monlong, J and Ebler, J and Novak, AM and Eizenga, JM and Gao, Y and , and Marschall, T and Li, H and Paten, B}, title = {Pangenome graph construction from genome alignments with Minigraph-Cactus.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {37165083}, issn = {1546-1696}, support = {U01 HG010971/HG/NHGRI NIH HHS/United States ; }, abstract = {Pangenome references address biases of reference genomes by storing a representative set of diverse haplotypes and their alignment, usually as a graph. Alternate alleles determined by variant callers can be used to construct pangenome graphs, but advances in long-read sequencing are leading to widely available, high-quality phased assemblies. Constructing a pangenome graph directly from assemblies, as opposed to variant calls, leverages the graph's ability to represent variation at different scales. Here we present the Minigraph-Cactus pangenome pipeline, which creates pangenomes directly from whole-genome alignments, and demonstrate its ability to scale to 90 human haplotypes from the Human Pangenome Reference Consortium. The method builds graphs containing all forms of genetic variation while allowing use of current mapping and genotyping tools. We measure the effect of the quality and completeness of reference genomes used for analysis within the pangenomes and show that using the CHM13 reference from the Telomere-to-Telomere Consortium improves the accuracy of our methods. We also demonstrate construction of a Drosophila melanogaster pangenome.}, } @article {pmid37164484, year = {2023}, author = {Porubsky, D and Vollger, MR and Harvey, WT and Rozanski, AN and Ebert, P and Hickey, G and Hasenfeld, P and Sanders, AD and Stober, C and , and Korbel, JO and Paten, B and Marschall, T and Eichler, EE}, title = {Gaps and complex structurally variant loci in phased genome assemblies.}, journal = {Genome research}, volume = {33}, number = {4}, pages = {496-510}, pmid = {37164484}, issn = {1549-5469}, support = {R01 HG002385/HG/NHGRI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *DNA, Satellite/genetics ; *Polymorphism, Genetic ; Haplotypes ; Segmental Duplications, Genomic ; Sequence Analysis, DNA ; }, abstract = {There has been tremendous progress in phased genome assembly production by combining long-read data with parental information or linked-read data. Nevertheless, a typical phased genome assembly generated by trio-hifiasm still generates more than 140 gaps. We perform a detailed analysis of gaps, assembly breaks, and misorientations from 182 haploid assemblies obtained from a diversity panel of 77 unique human samples. Although trio-based approaches using HiFi are the current gold standard, chromosome-wide phasing accuracy is comparable when using Strand-seq instead of parental data. Importantly, the majority of assembly gaps cluster near the largest and most identical repeats (including segmental duplications [35.4%], satellite DNA [22.3%], or regions enriched in GA/AT-rich DNA [27.4%]). Consequently, 1513 protein-coding genes overlap assembly gaps in at least one haplotype, and 231 are recurrently disrupted or missing from five or more haplotypes. Furthermore, we estimate that 6-7 Mbp of DNA are misorientated per haplotype irrespective of whether trio-free or trio-based approaches are used. Of these misorientations, 81% correspond to bona fide large inversion polymorphisms in the human species, most of which are flanked by large segmental duplications. We also identify large-scale alignment discontinuities consistent with 11.9 Mbp of deletions and 161.4 Mbp of insertions per haploid genome. Although 99% of this variation corresponds to satellite DNA, we identify 230 regions of euchromatic DNA with frequent expansions and contractions, nearly half of which overlap with 197 protein-coding genes. Such variable and incompletely assembled regions are important targets for future algorithmic development and pangenome representation.}, } @article {pmid37154680, year = {2023}, author = {Castillo, AI and Almeida, RPP}, title = {The Multifaceted Role of Homologous Recombination in a Fastidious Bacterial Plant Pathogen.}, journal = {Applied and environmental microbiology}, volume = {89}, number = {5}, pages = {e0043923}, pmid = {37154680}, issn = {1098-5336}, support = {S10 OD018174/CD/ODCDC CDC HHS/United States ; }, mesh = {Phylogeny ; *Genetic Variation ; Genome, Bacterial ; *Xylella/genetics ; Homologous Recombination ; Plants/genetics ; Plant Diseases/microbiology ; }, abstract = {Homologous recombination plays a key function in the evolution of bacterial genomes. Within Xylella fastidiosa, an emerging plant pathogen with increasing host and geographic ranges, it has been suggested that homologous recombination facilitates host switching, speciation, and the development of virulence. We used 340 whole-genome sequences to study the relationship between inter- and intrasubspecific homologous recombination, random mutation, and natural selection across individual X. fastidiosa genes. Individual gene orthologs were identified and aligned, and a maximum likelihood (ML) gene tree was generated. Each gene alignment and tree pair were then used to calculate gene-wide and branch-specific r/m values (relative effect of recombination to mutation), gene-wide and branch-site nonsynonymous over synonymous substitution rates (dN/dS values; episodic selection), and branch length (as a proxy for mutation rate). The relationships between these variables were evaluated at the global level (i.e., for all genes among and within a subspecies), among specific functional classes (i.e., COGs), and between pangenome components (i.e., accessory versus core genes). Our analysis showed that r/m varied widely among genes as well as across X. fastidiosa subspecies. While r/m and dN/dS values were positively correlated in some instances (e.g., core genes in X. fastidiosa subsp. fastidiosa and both core and accessory genes in X. fastidiosa subsp. multiplex), low correlation coefficients suggested no clear biological significance. Overall, our results indicate that, in addition to its adaptive role in certain genes, homologous recombination acts as a homogenizing and a neutral force across phylogenetic clades, gene functional groups, and pangenome components. IMPORTANCE There is ample evidence that homologous recombination occurs frequently in the economically important plant pathogen Xylella fastidiosa. Homologous recombination has been known to occur among sympatric subspecies and is associated with host-switching events and virulence-linked genes. As a consequence, is it generally assumed that recombinant events in X. fastidiosa are adaptive. This mindset influences expectations of how homologous recombination acts as an evolutionary force as well as how management strategies for X. fastidiosa diseases are determined. Yet, homologous recombination plays roles beyond that of a source for diversification and adaptation. Homologous recombination can act as a DNA repair mechanism, as a means to facilitate nucleotide compositional change, as a homogenization mechanism within populations, or even as a neutral force. Here, we provide a first assessment of long-held beliefs regarding the general role of recombination in adaptation for X. fastidiosa. We evaluate gene-specific variations in homologous recombination rate across three X. fastidiosa subspecies and its relationship to other evolutionary forces (e.g., natural selection, mutation, etc.). These data were used to assess the role of homologous recombination in X. fastidiosa evolution.}, } @article {pmid37153161, year = {2023}, author = {Saroha, T and Patil, PP and Rana, R and Kumar, R and Kumar, S and Singhal, L and Gautam, V and Patil, PB}, title = {Genomic features, antimicrobial susceptibility, and epidemiological insights into Burkholderia cenocepacia clonal complex 31 isolates from bloodstream infections in India.}, journal = {Frontiers in cellular and infection microbiology}, volume = {13}, number = {}, pages = {1151594}, pmid = {37153161}, issn = {2235-2988}, mesh = {Humans ; *Burkholderia cenocepacia/genetics ; Phylogeny ; *Burkholderia Infections/epidemiology ; *Burkholderia cepacia complex/genetics ; Genomics ; *Anti-Infective Agents ; *Sepsis ; Fibrosis ; }, abstract = {INTRODUCTION: Burkholderia cepacia complex (Bcc) clonal complex (CC) 31, the predominant lineage causing devastating outbreaks globally, has been a growing concern of infections in non-cystic fibrosis (NCF) patients in India. B. cenocepacia is very challenging to treat owing to its virulence determinants and antibiotic resistance. Improving the management of these infections requires a better knowledge of their resistance patterns and mechanisms.

METHODS: Whole-genome sequences of 35 CC31 isolates obtained from patient samples, were analyzed against available 210 CC31 genomes in the NCBI database to glean details of resistance, virulence, mobile elements, and phylogenetic markers to study genomic diversity and evolution of CC31 lineage in India.

RESULTS: Genomic analysis revealed that 35 isolates belonging to CC31 were categorized into 11 sequence types (ST), of which five STs were reported exclusively from India. Phylogenetic analysis classified 245 CC31 isolates into eight distinct clades (I-VIII) and unveiled that NCF isolates are evolving independently from the global cystic fibrosis (CF) isolates forming a distinct clade. The detection rate of seven classes of antibiotic-related genes in 35 isolates was 35 (100%) for tetracyclines, aminoglycosides, and fluoroquinolones; 26 (74.2%) for sulphonamides and phenicols; 7 (20%) for beta-lactamases; and 1 (2.8%) for trimethoprim resistance genes. Additionally, 3 (8.5%) NCF isolates were resistant to disinfecting agents and antiseptics. Antimicrobial susceptibility testing revealed that majority of NCF isolates were resistant to chloramphenicol (77%) and levofloxacin (34%). NCF isolates have a comparable number of virulence genes to CF isolates. A well-studied pathogenicity island of B. cenocepacia, GI11 is present in ST628 and ST709 isolates from the Indian Bcc population. In contrast, genomic island GI15 (highly similar to the island found in B. pseudomallei strain EY1) is exclusively reported in ST839 and ST824 isolates from two different locations in India. Horizontal acquisition of lytic phage ST79 of pathogenic B. pseudomallei is demonstrated in ST628 isolates Bcc1463, Bcc29163, and BccR4654 amongst CC31 lineage.

DISCUSSION: The study reveals a high diversity of CC31 lineages among B. cenocepacia isolates from India. The extensive information from this study will facilitate the development of rapid diagnostic and novel therapeutic approaches to manage B. cenocepacia infections.}, } @article {pmid37152722, year = {2023}, author = {Aziz, T and Naveed, M and Jabeen, K and Shabbir, MA and Sarwar, A and Zhennai, Y and Alharbi, M and Alshammari, A and Alasmari, AF}, title = {Integrated genome based evaluation of safety and probiotic characteristics of Lactiplantibacillus plantarum YW11 isolated from Tibetan kefir.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1157615}, pmid = {37152722}, issn = {1664-302X}, abstract = {The comparative genomic analysis of Lactiplantibacillus plantarum YW11 (L. plantarum YW11) isolated from Tibetan kefir involves comparison of the complete genome sequences of the isolated strain with other closely related L. plantarum strains. This type of analysis can be used to identify the genetic diversity among strains and to explore the genetic characteristics of the YW11 strain. The genome of L. plantarum YW11 was found to be composed of a circular single chromosome of 4,597,470 bp with a G + C content of 43.2%. A total of 4,278 open reading frames (ORFs) were identified in the genome and the coding density was found to be 87.8%. A comparative genomic analysis was conducted using two other L. plantarum strains, L. plantarum C11 and L. plantarum LMG21703. Genomic comparison revealed that L. plantarum YW11 shared 72.7 and 75.2% of gene content with L. plantarum C11 and L. plantarum LMG21703, respectively. Most of the genes shared between the three L. plantarum strains were involved in carbohydrate metabolism, energy production and conversion, amino acid metabolism, and transcription. In this analysis, 10 previously sequenced entire genomes of the species were compared using an in-silico technique to discover genomic divergence in genes linked with carbohydrate intake and their potential adaptations to distinct human intestinal environments. The subspecies pan-genome was open, which correlated with its extraordinary capacity to colonize several environments. Phylogenetic analysis revealed that the novel genomes were homogenously grouped among subspecies of l Lactiplantibacillus. L. plantarum was resistant to cefoxitin, erythromycin, and metronidazole, inhibited pathogens including Listeria monocytogenes, Clostridium difficile, Vibrio cholera, and others, and had excellent aerotolerance, which is useful for industrial operations. The comparative genomic analysis of L. plantarum YW11 isolated from Tibetan kefir can provide insights into the genetic characteristics of the strain, which can be used to further understand its role in the production of kefir.}, } @article {pmid37147657, year = {2023}, author = {Mun, T and Vaddadi, NSK and Langmead, B}, title = {Pangenomic genotyping with the marker array.}, journal = {Algorithms for molecular biology : AMB}, volume = {18}, number = {1}, pages = {2}, pmid = {37147657}, issn = {1748-7188}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; R35 GM139602/GM/NIGMS NIH HHS/United States ; R01HG011392/HG/NHGRI NIH HHS/United States ; R35GM139602/GM/NIGMS NIH HHS/United States ; }, abstract = {We present a new method and software tool called rowbowt that applies a pangenome index to the problem of inferring genotypes from short-read sequencing data. The method uses a novel indexing structure called the marker array. Using the marker array, we can genotype variants with respect from large panels like the 1000 Genomes Project while reducing the reference bias that results when aligning to a single linear reference. rowbowt can infer accurate genotypes in less time and memory compared to existing graph-based methods. The method is implemented in the open source software tool rowbowt available at https://github.com/alshai/rowbowt .}, } @article {pmid37144759, year = {2023}, author = {Basharat, Z and Meshal, A}, title = {Pan-genome mediated therapeutic target mining in Kingella kingae and inhibition assessment using traditional Chinese medicinal compounds: an informatics approach.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-14}, doi = {10.1080/07391102.2023.2208221}, pmid = {37144759}, issn = {1538-0254}, abstract = {Kingella kingae causes bacteremia, endocarditis, osteomyelitis, septic arthritis, meningitis, spondylodiscitis, and lower respiratory tract infections in pediatric patients. Usually it demonstrates disease after inflammation of mouth, lips or infections of the upper respiratory tract. To date, therapeutic targets in this bacterium remain unexplored. We have utilized a battery of bioinformatics tools to mine these targets in this study. Core genes were initially inferred from 55 genomes of K. kingae and 39 therapeutic targets were mined using an in-house pipeline. We selected aroG product (KDPG aldolase) involved in chorismate pathway, for inhibition analysis of this bacterium using lead-like metabolites from traditional Chinese medicinal plants. Pharmacophore generation was done using control ZINC36444158 (1,16-bis[(dihydroxyphosphinyl)oxy]hexadecane), followed by molecular docking of top hits from a library of 36,000 compounds. Top prioritized compounds were ZINC95914016, ZINC33833283 and ZINC95914219. ADME profiling and simulation of compound dosing (100 mg tablet) was done to infer compartmental pharmacokinetics in a population of 300 individuals in fasting state. PkCSM based toxicity analysis revealed the compounds ZINC95914016 and ZINC95914219 as safe and with almost similar bioavailability. However, ZINC95914016 takes less time to reach maximum concentration in the plasma and shows several optimal parameters compared to other leads. In light of obtained data, we recommend this compound for further testing and induction in experimental drug design pipeline.Communicated by Ramaswamy H. Sarma.}, } @article {pmid37143156, year = {2023}, author = {Gong, Y and Li, Y and Liu, X and Ma, Y and Jiang, L}, title = {A review of the pangenome: how it affects our understanding of genomic variation, selection and breeding in domestic animals?.}, journal = {Journal of animal science and biotechnology}, volume = {14}, number = {1}, pages = {73}, pmid = {37143156}, issn = {1674-9782}, support = {31961143021//Innovative Research Group Project of the National Natural Science Foundation of China/ ; CARS-39-01//Earmarked Fund for Modern Agro-industry Technology Research System/ ; ASTIP-IAS01//National Defense Science and Technology Innovation Fund of the Chinese Academy of Sciences/ ; }, abstract = {As large-scale genomic studies have progressed, it has been revealed that a single reference genome pattern cannot represent genetic diversity at the species level. While domestic animals tend to have complex routes of origin and migration, suggesting a possible omission of some population-specific sequences in the current reference genome. Conversely, the pangenome is a collection of all DNA sequences of a species that contains sequences shared by all individuals (core genome) and is also able to display sequence information unique to each individual (variable genome). The progress of pangenome research in humans, plants and domestic animals has proved that the missing genetic components and the identification of large structural variants (SVs) can be explored through pangenomic studies. Many individual specific sequences have been shown to be related to biological adaptability, phenotype and important economic traits. The maturity of technologies and methods such as third-generation sequencing, Telomere-to-telomere genomes, graphic genomes, and reference-free assembly will further promote the development of pangenome. In the future, pangenome combined with long-read data and multi-omics will help to resolve large SVs and their relationship with the main economic traits of interest in domesticated animals, providing better insights into animal domestication, evolution and breeding. In this review, we mainly discuss how pangenome analysis reveals genetic variations in domestic animals (sheep, cattle, pigs, chickens) and their impacts on phenotypes and how this can contribute to the understanding of species diversity. Additionally, we also go through potential issues and the future perspectives of pangenome research in livestock and poultry.}, } @article {pmid37138640, year = {2023}, author = {Sorouri, B and Rodriguez, CI and Gaut, BS and Allison, SD}, title = {Variation in Sphingomonas traits across habitats and phylogenetic clades.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1146165}, pmid = {37138640}, issn = {1664-302X}, abstract = {Whether microbes show habitat preferences is a fundamental question in microbial ecology. If different microbial lineages have distinct traits, those lineages may occur more frequently in habitats where their traits are advantageous. Sphingomonas is an ideal bacterial clade in which to investigate how habitat preference relates to traits because these bacteria inhabit diverse environments and hosts. Here we downloaded 440 publicly available Sphingomonas genomes, assigned them to habitats based on isolation source, and examined their phylogenetic relationships. We sought to address whether: (1) there is a relationship between Sphingomonas habitat and phylogeny, and (2) whether there is a phylogenetic correlation between key, genome-based traits and habitat preference. We hypothesized that Sphingomonas strains from similar habitats would cluster together in phylogenetic clades, and key traits that improve fitness in specific environments should correlate with habitat. Genome-based traits were categorized into the Y-A-S trait-based framework for high growth yield, resource acquisition, and stress tolerance. We selected 252 high quality genomes and constructed a phylogenetic tree with 12 well-defined clades based on an alignment of 404 core genes. Sphingomonas strains from the same habitat clustered together within the same clades, and strains within clades shared similar clusters of accessory genes. Additionally, key genome-based trait frequencies varied across habitats. We conclude that Sphingomonas gene content reflects habitat preference. This knowledge of how environment and host relate to phylogeny may also help with future functional predictions about Sphingomonas and facilitate applications in bioremediation.}, } @article {pmid37138622, year = {2023}, author = {Zhou, Y and Jiang, D and Yao, X and Luo, Y and Yang, Z and Ren, M and Zhang, G and Yu, Y and Lu, A and Wang, Y}, title = {Pan-genome wide association study of Glaesserella parasuis highlights genes associated with virulence and biofilm formation.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1160433}, pmid = {37138622}, issn = {1664-302X}, abstract = {Glaesserella parasuis is a gram-negative bacterium that causes fibrotic polyserositis and arthritis in pig, significantly affecting the pig industry. The pan-genome of G. parasuis is open. As the number of genes increases, the core and accessory genomes may show more pronounced differences. The genes associated with virulence and biofilm formation are also still unclear due to the diversity of G. parasuis. Therefore, we have applied a pan-genome-wide association study (Pan-GWAS) to 121 strains G. parasuis. Our analysis revealed that the core genome consists of 1,133 genes associated with the cytoskeleton, virulence, and basic biological processes. The accessory genome is highly variable and is a major cause of genetic diversity in G. parasuis. Furthermore, two biologically important traits (virulence, biofilm formation) of G. parasuis were studied via pan-GWAS to search for genes associated with the traits. A total of 142 genes were associated with strong virulence traits. By affecting metabolic pathways and capturing the host nutrients, these genes are involved in signal pathways and virulence factors, which are beneficial for bacterial survival and biofilm formation. This research lays the foundation for further studies on virulence and biofilm formation and provides potential new drug and vaccine targets against G. parasuis.}, } @article {pmid37138596, year = {2023}, author = {Zhao, Y and Wei, HM and Yuan, JL and Xu, L and Sun, JQ}, title = {A comprehensive genomic analysis provides insights on the high environmental adaptability of Acinetobacter strains.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1177951}, pmid = {37138596}, issn = {1664-302X}, abstract = {Acinetobacter is ubiquitous, and it has a high species diversity and a complex evolutionary pattern. To elucidate the mechanism of its high ability to adapt to various environment, 312 genomes of Acinetobacter strains were analyzed using the phylogenomic and comparative genomics methods. It was revealed that the Acinetobacter genus has an open pan-genome and strong genome plasticity. The pan-genome consists of 47,500 genes, with 818 shared by all the genomes of Acinetobacter, while 22,291 are unique genes. Although Acinetobacter strains do not have a complete glycolytic pathway to directly utilize glucose as carbon source, most of them harbored the n-alkane-degrading genes alkB/alkM (97.1% of tested strains) and almA (96.7% of tested strains), which were responsible for medium-and long-chain n-alkane terminal oxidation reaction, respectively. Most Acinetobacter strains also have catA (93.3% of tested strains) and benAB (92.0% of tested strains) genes that can degrade the aromatic compounds catechol and benzoic acid, respectively. These abilities enable the Acinetobacter strains to easily obtain carbon and energy sources from their environment for survival. The Acinetobacter strains can manage osmotic pressure by accumulating potassium and compatible solutes, including betaine, mannitol, trehalose, glutamic acid, and proline. They respond to oxidative stress by synthesizing superoxide dismutase, catalase, disulfide isomerase, and methionine sulfoxide reductase that repair the damage caused by reactive oxygen species. In addition, most Acinetobacter strains contain many efflux pump genes and resistance genes to manage antibiotic stress and can synthesize a variety of secondary metabolites, including arylpolyene, β-lactone and siderophores among others, to adapt to their environment. These genes enable Acinetobacter strains to survive extreme stresses. The genome of each Acinetobacter strain contained different numbers of prophages (0-12) and genomic islands (GIs) (6-70), and genes related to antibiotic resistance were found in the GIs. The phylogenetic analysis revealed that the alkM and almA genes have a similar evolutionary position with the core genome, indicating that they may have been acquired by vertical gene transfer from their ancestor, while catA, benA, benB and the antibiotic resistance genes could have been acquired by horizontal gene transfer from the other organisms.}, } @article {pmid37138544, year = {2023}, author = {Oddy, J and Chhetry, M and Awal, R and Addy, J and Wilkinson, M and Smith, D and King, R and Hall, C and Testa, R and Murray, E and Raffan, S and Curtis, TY and Wingen, L and Griffiths, S and Berry, S and Elmore, JS and Cryer, N and Moreira de Almeida, I and Halford, NG}, title = {Genetic control of grain amino acid composition in a UK soft wheat mapping population.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20335}, doi = {10.1002/tpg2.20335}, pmid = {37138544}, issn = {1940-3372}, support = {BB/P016855/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/T017007/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/T50838X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {Wheat (Triticum aestivum L.) is a major source of nutrients for populations across the globe, but the amino acid composition of wheat grain does not provide optimal nutrition. The nutritional value of wheat grain is limited by low concentrations of lysine (the most limiting essential amino acid) and high concentrations of free asparagine (precursor to the processing contaminant acrylamide). There are currently few available solutions for asparagine reduction and lysine biofortification through breeding. In this study, we investigated the genetic architecture controlling grain free amino acid composition and its relationship to other traits in a Robigus × Claire doubled haploid population. Multivariate analysis of amino acids and other traits showed that the two groups are largely independent of one another, with the largest effect on amino acids being from the environment. Linkage analysis of the population allowed identification of quantitative trait loci (QTL) controlling free amino acids and other traits, and this was compared against genomic prediction methods. Following identification of a QTL controlling free lysine content, wheat pangenome resources facilitated analysis of candidate genes in this region of the genome. These findings can be used to select appropriate strategies for lysine biofortification and free asparagine reduction in wheat breeding programs.}, } @article {pmid37138543, year = {2023}, author = {Derbyshire, MC and Marsh, J and Tirnaz, S and Nguyen, HT and Batley, J and Bayer, PE and Edwards, D}, title = {Diversity of fatty acid biosynthesis genes across the soybean pangenome.}, journal = {The plant genome}, volume = {16}, number = {2}, pages = {e20334}, doi = {10.1002/tpg2.20334}, pmid = {37138543}, issn = {1940-3372}, mesh = {*Soybeans/genetics ; *Fatty Acid Desaturases/genetics ; Plant Proteins/genetics ; Plant Breeding ; Fatty Acids ; }, abstract = {Soybean (Glycine max) is a major crop that contributes more than half of global oilseed production. Much research has been directed towards improvement of the fatty acid profile of soybean seeds through marker assisted breeding. Recently published soybean pangenomes, based on thousands of soybean lines, provide an opportunity to identify new alleles that may be involved in fatty acid biosynthesis. In this study, we identify fatty acid biosynthesis genes in soybean pangenomes based on sequence identity with known genes and examine their sequence diversity across diverse soybean collections. We find three possible instances of a gene missing in wild soybean, including FAD8 and FAD2-2D, which may be involved in oleic and linoleic acid desaturation, respectively, although we recommend follow-up research to verify the absence of these genes. More than half of the 53 fatty acid biosynthesis genes identified contained missense variants, including one linked with a previously identified QTL for oil quality. These variants were present in multiple studies based on either short read mappings or alignment of reference grade genomes. Missense variants were found in previously characterized genes including FAD2-1A and FAD2-1B, both of which are involved in desaturation of oleic acid, as well as uncharacterized candidate fatty acid biosynthesis genes. We find that the frequency of missense alleles in fatty acid biosynthesis genes has been reduced significantly more than the global average frequency of missense mutations during domestication, and missense variation in some genes is near absent in modern cultivars. This could be due to the selection for fatty acid profiles in seed, though future work should be conducted towards understanding the phenotypic impacts of these variants.}, } @article {pmid37129508, year = {2023}, author = {Maki, JJ and Howard, M and Connelly, S and Pettengill, MA and Hardy, DJ and Cameron, A}, title = {Species Delineation and Comparative Genomics within the Campylobacter ureolyticus Complex.}, journal = {Journal of clinical microbiology}, volume = {61}, number = {5}, pages = {e0004623}, pmid = {37129508}, issn = {1098-660X}, mesh = {Humans ; *Campylobacter ; *Campylobacter Infections/microbiology ; Genomics ; Anti-Bacterial Agents ; *Gastroenteritis/microbiology ; DNA ; *Campylobacter jejuni/genetics ; }, abstract = {Campylobacter ureolyticus is an emerging pathogen increasingly appreciated as a common cause of gastroenteritis and extra-intestinal infections in humans. Outside the setting of gastroenteritis, little work has been done to describe the genomic content and relatedness of the species, especially regarding clinical isolates. We reviewed the epidemiology of clinical C. ureolyticus cultured by our institution over the past 10 years. Fifty-one unique C. ureolyticus isolates were identified between January 2010 and August 2022, mostly originating from abscesses and blood cultures. To clarify the taxonomic relationships between isolates and to attribute specific genes with different clinical manifestations, we sequenced 19 available isolates from a variety of clinical specimen types and conducted a pangenomic analysis with publicly available C. ureolyticus genomes. Digital DNA:DNA hybridization suggested that these C. ureolyticus comprised a species complex of 10 species clusters (SCs) and several subspecies clusters. Although some orthologous genes or gene functions were enriched in isolates found in different SCs and clinical specimens, no association was significant. Nearly a third of the isolates possessed antimicrobial resistance genes, including the ermA resistance gene, potentially conferring resistance to macrolides, the treatment of choice for severe human campylobacteriosis. This work effectively doubles the number of publicly available C. ureolyticus genomes, provides further clarification of taxonomic relationships within this bacterial complex, and identifies target SCs for future analysis.}, } @article {pmid37127330, year = {2023}, author = {Weller, CA and Andreev, I and Chambers, MJ and Park, M and , and Bloom, JS and Sadhu, MJ}, title = {Highly complete long-read genomes reveal pangenomic variation underlying yeast phenotypic diversity.}, journal = {Genome research}, volume = {33}, number = {5}, pages = {729-740}, pmid = {37127330}, issn = {1549-5469}, support = {ZIA HG200401/ImNIH/Intramural NIH HHS/United States ; ZIB HG000196/ImNIH/Intramural NIH HHS/United States ; }, mesh = {*Saccharomyces cerevisiae/genetics ; Quantitative Trait Loci ; Chromosome Mapping ; Phenotype ; *Saccharomyces cerevisiae Proteins/genetics ; }, abstract = {Understanding the genetic causes of trait variation is a primary goal of genetic research. One way that individuals can vary genetically is through variable pangenomic genes: genes that are only present in some individuals in a population. The presence or absence of entire genes could have large effects on trait variation. However, variable pangenomic genes can be missed in standard genotyping workflows, owing to reliance on aligning short-read sequencing to reference genomes. A popular method for studying the genetic basis of trait variation is linkage mapping, which identifies quantitative trait loci (QTLs), regions of the genome that harbor causative genetic variants. Large-scale linkage mapping in the budding yeast Saccharomyces cerevisiae has found thousands of QTLs affecting myriad yeast phenotypes. To enable the resolution of QTLs caused by variable pangenomic genes, we used long-read sequencing to generate highly complete de novo genome assemblies of 16 diverse yeast isolates. With these assemblies, we resolved QTLs for growth on maltose, sucrose, raffinose, and oxidative stress to specific genes that are absent from the reference genome but present in the broader yeast population at appreciable frequency. Copies of genes also duplicate onto chromosomes where they are absent in the reference genome, and we found that these copies generate additional QTLs whose resolution requires pangenome characterization. Our findings show the need for highly complete genome assemblies to identify the genetic basis of trait variation.}, } @article {pmid37125195, year = {2023}, author = {Saxena, P and Rauniyar, S and Thakur, P and Singh, RN and Bomgni, A and Alaba, MO and Tripathi, AK and Gnimpieba, EZ and Lushbough, C and Sani, RK}, title = {Integration of text mining and biological network analysis: Identification of essential genes in sulfate-reducing bacteria.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1086021}, pmid = {37125195}, issn = {1664-302X}, support = {P20 GM103443/GM/NIGMS NIH HHS/United States ; P20 RR016479/RR/NCRR NIH HHS/United States ; }, abstract = {The growth and survival of an organism in a particular environment is highly depends on the certain indispensable genes, termed as essential genes. Sulfate-reducing bacteria (SRB) are obligate anaerobes which thrives on sulfate reduction for its energy requirements. The present study used Oleidesulfovibrio alaskensis G20 (OA G20) as a model SRB to categorize the essential genes based on their key metabolic pathways. Herein, we reported a feedback loop framework for gene of interest discovery, from bio-problem to gene set of interest, leveraging expert annotation with computational prediction. Defined bio-problem was applied to retrieve the genes of SRB from literature databases (PubMed, and PubMed Central) and annotated them to the genome of OA G20. Retrieved gene list was further used to enrich protein-protein interaction and was corroborated to the pangenome analysis, to categorize the enriched gene sets and the respective pathways under essential and non-essential. Interestingly, the sat gene (dde_2265) from the sulfur metabolism was the bridging gene between all the enriched pathways. Gene clusters involved in essential pathways were linked with the genes from seleno-compound metabolism, amino acid metabolism, secondary metabolite synthesis, and cofactor biosynthesis. Furthermore, pangenome analysis demonstrated the gene distribution, where 69.83% of the 116 enriched genes were mapped under "persistent," inferring the essentiality of these genes. Likewise, 21.55% of the enriched genes, which involves specially the formate dehydrogenases and metallic hydrogenases, appeared under "shell." Our methodology suggested that semi-automated text mining and network analysis may play a crucial role in deciphering the previously unexplored genes and key mechanisms which can help to generate a baseline prior to perform any experimental studies.}, } @article {pmid37122002, year = {2023}, author = {Porubsky, D and Harvey, WT and Rozanski, AN and Ebler, J and Höps, W and Ashraf, H and Hasenfeld, P and , and , and Paten, B and Sanders, AD and Marschall, T and Korbel, JO and Eichler, EE}, title = {Inversion polymorphism in a complete human genome assembly.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {100}, pmid = {37122002}, issn = {1474-760X}, support = {U24 HG007497/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Genome, Human ; *Polymorphism, Genetic ; Genomic Structural Variation ; Chromosome Inversion ; }, abstract = {The telomere-to-telomere (T2T) complete human reference has significantly improved our ability to characterize genome structural variation. To understand its impact on inversion polymorphisms, we remapped data from 41 genomes against the T2T reference genome and compared it to the GRCh38 reference. We find a ~ 21% increase in sensitivity improving mapping of 63 inversions on the T2T reference. We identify 26 misorientations within GRCh38 and show that the T2T reference is three times more likely to represent the correct orientation of the major human allele. Analysis of 10 additional samples reveals novel rare inversions at chromosomes 15q25.2, 16p11.2, 16q22.1-23.1, and 22q11.21.}, } @article {pmid37115804, year = {2023}, author = {Jacob, JJ and Pragasam, AK and Vasudevan, K and Velmurugan, A and Priya Teekaraman, M and Priya Thirumoorthy, T and Ray, P and Gupta, M and Kapil, A and Bai, SP and Nagaraj, S and Saigal, K and Chandola, TR and Thomas, M and Bavdekar, A and Ebenezer, SE and Shastri, J and De, A and Dutta, S and Alexander, AP and Koshy, RM and Jinka, DR and Singh, A and Srivastava, SK and Anandan, S and Dougan, G and John, J and Kang, G and Veeraraghavan, B and Mutreja, A}, title = {Genomic analysis unveils genome degradation events and gene flux in the emergence and persistence of S. Paratyphi A lineages.}, journal = {PLoS pathogens}, volume = {19}, number = {4}, pages = {e1010650}, pmid = {37115804}, issn = {1553-7374}, mesh = {Humans ; *Typhoid Fever/microbiology ; Salmonella typhi/genetics ; Phylogeny ; Salmonella paratyphi A/genetics ; Anti-Bacterial Agents ; Genomics ; }, abstract = {Paratyphoid fever caused by S. Paratyphi A is endemic in parts of South Asia and Southeast Asia. The proportion of enteric fever cases caused by S. Paratyphi A has substantially increased, yet only limited data is available on the population structure and genetic diversity of this serovar. We examined the phylogenetic distribution and evolutionary trajectory of S. Paratyphi A isolates collected as part of the Indian enteric fever surveillance study "Surveillance of Enteric Fever in India (SEFI)." In the study period (2017-2020), S. Paratyphi A comprised 17.6% (441/2503) of total enteric fever cases in India, with the isolates highly susceptible to all the major antibiotics used for treatment except fluoroquinolones. Phylogenetic analysis clustered the global S. Paratyphi A collection into seven lineages (A-G), and the present study isolates were distributed in lineages A, C and F. Our analysis highlights that the genome degradation events and gene acquisitions or losses are key molecular events in the evolution of new S. Paratyphi A lineages/sub-lineages. A total of 10 hypothetically disrupted coding sequences (HDCS) or pseudogenes-forming mutations possibly associated with the emergence of lineages were identified. The pan-genome analysis identified the insertion of P2/PSP3 phage and acquisition of IncX1 plasmid during the selection in 2.3.2/2.3.3 and 1.2.2 genotypes, respectively. We have identified six characteristic missense mutations associated with lipopolysaccharide (LPS) biosynthesis genes of S. Paratyphi A, however, these mutations confer only a low structural impact and possibly have minimal impact on vaccine effectiveness. Since S. Paratyphi A is human-restricted, high levels of genetic drift are not expected unless these bacteria transmit to naive hosts. However, public-health investigation and monitoring by means of genomic surveillance would be constantly needed to avoid S. Paratyphi A serovar becoming a public health threat similar to the S. Typhi of today.}, } @article {pmid37110377, year = {2023}, author = {Ariute, JC and Felice, AG and Soares, S and da Gama, MAS and de Souza, EB and Azevedo, V and Brenig, B and Aburjaile, F and Benko-Iseppon, AM}, title = {Characterization and Association of Rips Repertoire to Host Range of Novel Ralstonia solanacearum Strains by In Silico Approaches.}, journal = {Microorganisms}, volume = {11}, number = {4}, pages = {}, pmid = {37110377}, issn = {2076-2607}, abstract = {Ralstonia solanacearum species complex (RSSC) cause several phytobacteriosis in many economically important crops around the globe, especially in the tropics. In Brazil, phylotypes I and II cause bacterial wilt (BW) and are indistinguishable by classical microbiological and phytopathological methods, while Moko disease is caused only by phylotype II strains. Type III effectors of RSSC (Rips) are key molecular actors regarding pathogenesis and are associated with specificity to some hosts. In this study, we sequenced and characterized 14 newly RSSC isolates from Brazil's Northern and Northeastern regions, including BW and Moko ecotypes. Virulence and resistance sequences were annotated, and the Rips repertoire was predicted. Confirming previous studies, RSSC pangenome is open as α≅0.77. Genomic information regarding these isolates matches those for R. solanacearum in NCBI. All of them fit in phylotype II with a similarity above 96%, with five isolates in phylotype IIB and nine in phylotype IIA. Almost all R. solanacearum genomes in NCBI are actually from other species in RSSC. Rips repertoire of Moko IIB was more homogeneous, except for isolate B4, which presented ten non-shared Rips. Rips repertoire of phylotype IIA was more diverse in both Moko and BW, with 43 common shared Rips among all 14 isolates. New BW isolates shared more Rips with Moko IIA and Moko IIB than with other public BW genome isolates from Brazil. Rips not shared with other isolates might contribute to individual virulence, but commonly shared Rips are good avirulence candidates. The high number of Rips shared by new Moko and BW isolates suggests they are actually Moko isolates infecting solanaceous hosts. Finally, infection assays and Rips expression on different hosts are needed to better elucidate the association between Rips repertoire and host specificities.}, } @article {pmid37105244, year = {2023}, author = {Henaut-Jacobs, S and Passarelli-Araujo, H and Venancio, TM}, title = {Comparative genomics and phylogenomics of Campylobacter unveil potential novel species and provide insights into niche segregation.}, journal = {Molecular phylogenetics and evolution}, volume = {184}, number = {}, pages = {107786}, doi = {10.1016/j.ympev.2023.107786}, pmid = {37105244}, issn = {1095-9513}, mesh = {*Campylobacter/genetics ; Phylogeny ; Genome, Bacterial ; Genomics/methods ; Bacteria/genetics ; }, abstract = {Campylobacter is a bacterial genus associated with community outbreaks and gastrointestinal symptoms. Studies on Campylobacter generally focus on specific pathogenic species such as C. coli and C. jejuni. Currently, there are thousands of publicly available Campylobacter genomes, allowing a more complete assessment of the genus diversity. In this work, we report a network-based analysis of all available Campylobacter genomes to explore the genus structure and diversity, revealing potentially new species and elucidating genus features. We also hypothesize that the previously established Clade III of C. coli is in fact a novel species (referred here as Campylobacter spp12). Finally, we found a negative correlation between pangenome fluidity and saturation coefficient, with potential implications to the lifestyles of distinct Campylobacter species. Since pangenome analysis depends on the number of available genomes, this correlation could help estimate pangenome metrics of Campylobacter species with less sequenced genomes, helping understand their lifestyle and niche adaptation. Together, our results indicate that the Campylobacter genus should be re-evaluated, with particular attention to the interplay between genome structure and niche segregation.}, } @article {pmid37103716, year = {2023}, author = {Matussek, A and Mernelius, S and Chromek, M and Zhang, J and Frykman, A and Hansson, S and Georgieva, V and Xiong, Y and Bai, X}, title = {Genome-wide association study of hemolytic uremic syndrome causing Shiga toxin-producing Escherichia coli from Sweden, 1994-2018.}, journal = {European journal of clinical microbiology & infectious diseases : official publication of the European Society of Clinical Microbiology}, volume = {42}, number = {6}, pages = {771-779}, pmid = {37103716}, issn = {1435-4373}, support = {SLS884041//Scandinavian Society for Antimicrobial Chemotherapy Foundation/ ; Dnr: 2022-00277//Ruth och Richard Julins Stiftelse/ ; }, mesh = {Humans ; *Shiga-Toxigenic Escherichia coli ; Genome-Wide Association Study ; *Escherichia coli Proteins/genetics ; Sweden/epidemiology ; Phylogeny ; *Escherichia coli Infections/complications/epidemiology/microbiology ; *Hemolytic-Uremic Syndrome/epidemiology/microbiology ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) infection can cause clinical manifestations ranging from diarrhea to potentially fatal hemolytic uremic syndrome (HUS). This study is aimed at identifying STEC genetic factors associated with the development of HUS in Sweden. A total of 238 STEC genomes from STEC-infected patients with and without HUS between 1994 and 2018 in Sweden were included in this study. Serotypes, Shiga toxin gene (stx) subtypes, and virulence genes were characterized in correlation to clinical symptoms (HUS and non-HUS), and pan-genome wide association study was performed. Sixty-five strains belonged to O157:H7, and 173 belonged to non-O157 serotypes. Our study revealed that strains of O157:H7 serotype especially clade 8 were most commonly found in patients with HUS in Sweden. stx2a and stx2a + stx2c subtypes were significantly associated with HUS. Other virulence factors associated with HUS mainly included intimin (eae) and its receptor (tir), adhesion factors, toxins, and secretion system proteins. Pangenome wide-association study identified numbers of accessory genes significantly overrepresented in HUS-STEC strains, including genes encoding outer membrane proteins, transcriptional regulators, phage-related proteins, and numerous genes related to hypothetical proteins. Whole-genome phylogeny and multiple correspondence analysis of pangenomes could not differentiate HUS-STEC from non-HUS-STEC strains. In O157:H7 cluster, strains from HUS patients clustered closely; however, no significant difference in virulence genes was found in O157 strains from patients with and without HUS. These results suggest that STEC strains from different phylogenetic backgrounds may independently acquire genes determining their pathogenicity and confirm that other non-bacterial factors and/or bacteria-host interaction may affect STEC pathogenesis.}, } @article {pmid37098951, year = {2023}, author = {Rodrigues, C and Lanza, VF and Peixe, L and Coque, TM and Novais, Â}, title = {Phylogenomics of Globally Spread Clonal Groups 14 and 15 of Klebsiella pneumoniae.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0339522}, pmid = {37098951}, issn = {2165-0497}, support = {POCI/01/0145/FEDER/007728//European Union/ ; PT2020 UID/MULTI/04378/2013//MEC | Fundação para a Ciência e a Tecnologia (FCT)/ ; SFRH/BD/84341/2012//MEC | Fundação para a Ciência e a Tecnologia (FCT)/ ; FEMS-RG-2014-0089//Federation of European Microbiological Societies (FEMS)/ ; 2021.02252.CEECIND/CP1662/CT0009//MEC | Fundação para a Ciência e a Tecnologia (FCT)/ ; MISTAR AC21/2 00041//European Commission (EC)/ ; //MEC | Instituto de Salud Carlos III (ISCIII)/ ; //Fundación Francisco Soria Melguizo/ ; CP22/00164//Instituto de Salud Carlos III (ISCIII)/ ; }, mesh = {Humans ; *Klebsiella pneumoniae ; Phylogeny ; Plasmids/genetics ; beta-Lactamases/genetics ; Anti-Bacterial Agents/pharmacology ; *Klebsiella Infections/epidemiology ; Microbial Sensitivity Tests ; Drug Resistance, Multiple, Bacterial/genetics ; }, abstract = {Klebsiella pneumoniae sequence type 14 (ST14) and ST15 caused outbreaks of CTX-M-15 and/or carbapenemase producers worldwide, but their phylogeny and global dynamics remain unclear. We clarified the evolution of K. pneumoniae clonal group 14 (CG14) and CG15 by analyzing the capsular locus (KL), resistome, virulome, and plasmidome of public genomes (n = 481) and de novo sequences (n = 9) representing main sublineages circulating in Portugal. CG14 and CG15 evolved independently within 6 main subclades defined according to the KL and the accessory genome. The CG14 (n = 65) clade was structured in two large monophyletic subclades, CG14-I (KL2, 86%) and CG14-II (KL16, 14%), whose emergences were dated to 1932 and 1911, respectively. Genes encoding extended-spectrum β-lactamase (ESBL), AmpC, and/or carbapenemases were mostly observed in CG14-I (71% versus 22%). CG15 clade (n = 170) was segregated into subclades CG15-IA (KL19/KL106, 9%), CG15-IB (variable KL types, 6%), CG15-IIA (KL24, 43%) and CG15-IIB (KL112, 37%). Most CG15 genomes carried specific GyrA and ParC mutations and emerged from a common ancestor in 1989. CTX-M-15 was especially prevalent in CG15 (68% CG15 versus 38% CG14) and in CG15-IIB (92%). Plasmidome analysis revealed 27 predominant plasmid groups (PG), including particularly pervasive and recombinant F-type (n = 10), Col (n = 10), and new plasmid types. While blaCTX-M-15 was acquired multiple times by a high diversity of F-type mosaic plasmids, other antibiotic resistance genes (ARGs) were dispersed by IncL (blaOXA-48) or IncC (blaCMY/TEM-24) plasmids. We first demonstrate an independent evolutionary trajectory for CG15 and CG14 and how the acquisition of specific KL, quinolone-resistance determining region (QRDR) mutations (CG15), and ARGs in highly recombinant plasmids could have shaped the expansion and diversification of particular subclades (CG14-I and CG15-IIA/IIB). IMPORTANCE Klebsiella pneumoniae represents a major threat in the burden of antibiotic resistance (ABR). Available studies to explain the origin, the diversity, and the evolution of certain ABR K. pneumoniae populations have mainly been focused on a few clonal groups (CGs) using phylogenetic analysis of the core genome, the accessory genome being overlooked. Here, we provide unique insights into the phylogenetic evolution of CG14 and CG15, two poorly characterized CGs which have contributed to the global dissemination of genes responsible for resistance to first-line antibiotics such as β-lactams. Our results point out an independent evolution of these two CGs and highlight the existence of different subclades structured by the capsular type and the accessory genome. Moreover, the contribution of a turbulent flux of plasmids (especially multireplicon F type and Col) and adaptive traits (antibiotic resistance and metal tolerance genes) to the pangenome reflect the exposure and adaptation of K. pneumoniae under different selective pressures.}, } @article {pmid37098652, year = {2023}, author = {Cui, X and Hu, M and Yao, S and Zhang, Y and Tang, M and Liu, L and Cheng, X and Tong, C and Liu, S}, title = {BnaOmics: A comprehensive platform combining pan-genome and multi-omics data from Brassica napus.}, journal = {Plant communications}, volume = {4}, number = {5}, pages = {100609}, pmid = {37098652}, issn = {2590-3462}, mesh = {*Brassica napus/genetics ; Multiomics ; Chromosome Mapping ; Genome, Plant/genetics ; }, } @article {pmid37098416, year = {2023}, author = {Gong, H and Huang, X and Zhu, W and Chen, J and Huang, Y and Zhao, Z and Weng, J and Che, Y and Wang, J and Wang, X}, title = {Pan-genome analysis of the Burkholderia gladioli PV. Cocovenenans reveal the extent of variation in the toxigenic gene cluster.}, journal = {Food microbiology}, volume = {113}, number = {}, pages = {104249}, doi = {10.1016/j.fm.2023.104249}, pmid = {37098416}, issn = {1095-9998}, mesh = {Humans ; *Burkholderia gladioli/genetics ; Bongkrekic Acid/analysis ; Multigene Family ; *Foodborne Diseases/microbiology ; }, abstract = {Burkholderia gladioli has been reported as the pathogen responsible for cases of foodborne illness in many countries. The poisonous bongkrekic acid (BA) produced by B. gladioli was linked to a gene cluster absent in non-pathogenic strains. The whole genome sequence of eight bacteria strains, which were screened from the collected 175 raw food and environmental samples, were assembled and analyzed to detect a significant association of 19 protein-coding genes with the pathogenic status. Except for the common BA synthesis-related gene, several other genes, including the toxin-antitoxin genes, were also absent in the non-pathogenic strains. The bacteria strains with the BA gene cluster were found to form a single cluster in the analysis of all B. gladioli genome assemblies for the variants in the gene cluster. Divergence of this cluster was detected in the analysis for both the flanking sequences and those of the whole genome level, which indicates its complex origin. Genome recombination was found to cause a precise sequence deletion in the gene cluster region, which was found to be predominant in the non-pathogenic strains indicating the possible effect of horizontal gene transfer. Our study provided new information and resources for understanding the evolution and divergence of the B. gladioli species.}, } @article {pmid37093956, year = {2023}, author = {Baumdicker, F and Kupczok, A}, title = {Tackling the Pangenome Dilemma Requires the Concerted Analysis of Multiple Population Genetic Processes.}, journal = {Genome biology and evolution}, volume = {15}, number = {5}, pages = {}, pmid = {37093956}, issn = {1759-6653}, mesh = {Humans ; *Prokaryotic Cells ; *Gene Transfer, Horizontal ; Computer Simulation ; Mutation ; }, abstract = {The pangenome is the set of all genes present in a prokaryotic population. Most pangenomes contain many accessory genes of low and intermediate frequencies. Different population genetics processes contribute to the shape of these pangenomes, namely selection and fitness-independent processes such as gene transfer, gene loss, and migration. However, their relative importance is unknown and highly debated. Here, we argue that the debate around prokaryotic pangenomes arose due to the imprecise application of population genetics models. Most importantly, two different processes of horizontal gene transfer act on prokaryotic populations, which are frequently confused, despite their fundamentally different behavior. Genes acquired from distantly related organisms (termed here acquiring gene transfer) are most comparable to mutation in nucleotide sequences. In contrast, gene gain within the population (termed here spreading gene transfer) has an effect on gene frequencies that is identical to the effect of positive selection on single genes. We thus show that selection and fitness-independent population genetic processes affecting pangenomes are indistinguishable at the level of single gene dynamics. Nevertheless, population genetics processes are fundamentally different when considering the joint distribution of all accessory genes across individuals of a population. We propose that, to understand to which degree the different processes shaped pangenome diversity, the development of comprehensive models and simulation tools is mandatory. Furthermore, we need to identify summary statistics and measurable features that can distinguish between the processes, where considering the joint distribution of accessory genes across individuals of a population will be particularly relevant.}, } @article {pmid37089548, year = {2023}, author = {Zhong, H and Zheng, N and Wang, J and Zhao, S}, title = {Isolation and pan-genome analysis of Enterobacter hormaechei Z129, a ureolytic bacterium, from the rumen of dairy cow.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1169973}, pmid = {37089548}, issn = {1664-302X}, abstract = {INTRODUCTION: Urea is an important non-protein nitrogen source for ruminants. In the rumen, ureolytic bacteria play critical roles in urea-nitrogen metabolism, however, a few ureolytic strains have been isolated and genomically sequenced. The purpose of this study was to isolate a novel ureolytic bacterial strain from cattle rumen and characterize its genome and function.

METHODS: The ureolytic bacterium was isolated using an anaerobic medium with urea and phenol red as a screening indicator from the rumen fluid of dairy cattle. The genome of isolates was sequenced, assembled, annotated, and comparatively analyzed. The pan-genome analysis was performed using IPGA and the biochemical activity was also analyzed by test kits.

RESULTS: A gram-positive ureolytic strain was isolated. Its genome had a length of 4.52 Mbp and predicted genes of 4223. The 16S rRNA gene and genome GTDB-Tk taxonomic annotation showed that it was a novel strain of Enterobacter hormaechei, and it was named E. hormaechei Z129. The pan-genome analysis showed that Z129 had the highest identity to E. hormaechei ATCC 49162 with a genome average nucleotide identity of 98.69% and possessed 238 unique genes. Strain Z129 was the first E. hormaechei strain isolated from the rumen as we know. The functional annotation of the Z129 genome showed genes related to urea metabolism, including urea transport (urtA-urtE), nickel ion transport (ureJ, tonB, nixA, exbB, exbD, and rcnA), urease activation (ureA-ureG) and ammonia assimilation (gdhA, glnA, glnB, glnE, glnL, glsA, gltB, and gltD) were present. Genes involved in carbohydrate metabolism were also present, including starch hydrolysis (amyE), cellulose hydrolysis (celB and bglX), xylose transport (xylF-xylH) and glycolysis (pgi, pgk, fbaA, eno, pfkA, gap, pyk, gpmL). Biochemical activity analysis showed that Z129 was positive for alkaline phosphatase, leucine arylamidase, acid phosphatase, naphthol-AS-BI-phosphohydrolase, α-glucosidase, β-glucosidase, and pyrrolidone arylaminase, and had the ability to use D-ribose, L-arabinose, and D-lactose. Urea-nitrogen hydrolysis rate of Z129 reached 55.37% at 48 h of incubation.

DISCUSSION: Therefore, the isolated novel ureolytic strain E. hormaechei Z129 had diverse nitrogen and carbon metabolisms, and is a preferred model to study the urea hydrolysis mechanism in the rumen.}, } @article {pmid37084119, year = {2023}, author = {Williams, AN and Croxen, MA and Demczuk, WHB and Martin, I and Tyrrell, GJ}, title = {Genomic characterization of emerging invasive Streptococcus agalactiae serotype VIII in Alberta, Canada.}, journal = {European journal of clinical microbiology & infectious diseases : official publication of the European Society of Clinical Microbiology}, volume = {42}, number = {6}, pages = {747-757}, pmid = {37084119}, issn = {1435-4373}, support = {RCP-19-003-MIF//Ministry of Economic Development and Trade, Government of Alberta/ ; }, mesh = {Infant, Newborn ; Humans ; Female ; Pregnancy ; Aged ; Serogroup ; *Clindamycin/therapeutic use ; Streptococcus agalactiae ; *Streptococcal Infections/microbiology ; Alberta/epidemiology ; Phylogeny ; Multilocus Sequence Typing ; Drug Resistance, Bacterial ; Anti-Bacterial Agents/pharmacology/therapeutic use ; Erythromycin/therapeutic use ; Genomics ; Microbial Sensitivity Tests ; }, abstract = {Invasive Group B Streptococcus (GBS) can infect pregnant women, neonates, and older adults. Invasive GBS serotype VIII is infrequent in Alberta; however, cases have increased in recent years. Here, genomic analysis was used to characterize fourteen adult invasive serotype VIII isolates from 2009 to 2021. Trends in descriptive clinical data and antimicrobial susceptibility results were evaluated for invasive serotype VIII isolates from Alberta. Isolate genomes were sequenced and subjected to molecular sequence typing, virulence and antimicrobial resistance gene identification, phylogenetic analysis, and pangenome determination. Multilocus sequencing typing identified eight ST42 (Clonal Complex; CC19), four ST1 (CC1), and two ST2 (CC1) profiles. Isolates were susceptible to penicillin, erythromycin, chloramphenicol, and clindamycin, apart from one isolate that displayed erythromycin and inducible clindamycin resistance. All isolates carried genes for peptide antibiotic resistance, three isolates for tetracycline resistance, and one for macrolide, lincosamide, and streptogramin resistance. All genomes carried targets currently being considered for protein-based vaccines (e.g., pili and/or Alpha family proteins). Overall, invasive GBS serotype VIII is emerging in Alberta, primarily due to ST42. Characterization and continued surveillance of serotype VIII will be important for outbreak prevention, informing vaccine development, and contributing to our understanding of the global epidemiology of this rare serotype.}, } @article {pmid37082513, year = {2022}, author = {Gangurde, SS and Xavier, A and Naik, YD and Jha, UC and Rangari, SK and Kumar, R and Reddy, MSS and Channale, S and Elango, D and Mir, RR and Zwart, R and Laxuman, C and Sudini, HK and Pandey, MK and Punnuri, S and Mendu, V and Reddy, UK and Guo, B and Gangarao, NVPR and Sharma, VK and Wang, X and Zhao, C and Thudi, M}, title = {Two decades of association mapping: Insights on disease resistance in major crops.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1064059}, pmid = {37082513}, issn = {1664-462X}, abstract = {Climate change across the globe has an impact on the occurrence, prevalence, and severity of plant diseases. About 30% of yield losses in major crops are due to plant diseases; emerging diseases are likely to worsen the sustainable production in the coming years. Plant diseases have led to increased hunger and mass migration of human populations in the past, thus a serious threat to global food security. Equipping the modern varieties/hybrids with enhanced genetic resistance is the most economic, sustainable and environmentally friendly solution. Plant geneticists have done tremendous work in identifying stable resistance in primary genepools and many times other than primary genepools to breed resistant varieties in different major crops. Over the last two decades, the availability of crop and pathogen genomes due to advances in next generation sequencing technologies improved our understanding of trait genetics using different approaches. Genome-wide association studies have been effectively used to identify candidate genes and map loci associated with different diseases in crop plants. In this review, we highlight successful examples for the discovery of resistance genes to many important diseases. In addition, major developments in association studies, statistical models and bioinformatic tools that improve the power, resolution and the efficiency of identifying marker-trait associations. Overall this review provides comprehensive insights into the two decades of advances in GWAS studies and discusses the challenges and opportunities this research area provides for breeding resistant varieties.}, } @article {pmid37074150, year = {2023}, author = {Pugh, HL and Connor, C and Siasat, P and McNally, A and Blair, JMA}, title = {E. coli ST11 (O157:H7) does not encode a functional AcrF efflux pump.}, journal = {Microbiology (Reading, England)}, volume = {169}, number = {4}, pages = {}, pmid = {37074150}, issn = {1465-2080}, support = {BB/M01116X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; MR/N013913/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Humans ; *Escherichia coli/genetics/metabolism ; Membrane Transport Proteins/genetics/metabolism ; Anti-Bacterial Agents/pharmacology/metabolism ; *Escherichia coli Proteins/genetics/metabolism ; Multidrug Resistance-Associated Proteins/metabolism ; Membrane Proteins/metabolism ; }, abstract = {Escherichia coli is a facultative anaerobe found in a wide range of environments. Commonly described as the laboratory workhorse, E. coli is one of the best characterized bacterial species to date, however much of our understanding comes from studies involving the laboratory strain E. coli K-12. Resistance-nodulation-division efflux pumps are found in Gram-negative bacteria and can export a diverse range of substrates, including antibiotics. E. coli K-12 has six RND pumps; AcrB, AcrD, AcrF, CusA, MdtBC and MdtF, and it is frequently reported that all E. coli strains possess these six pumps. However, this is not true of E. coli ST11, a lineage of E. coli, which is primarily composed of the highly virulent important human pathogen, E. coli O157:H7. Here we show that acrF is absent from the pangenome of ST11 and that this lineage of E. coli has a highly conserved insertion within the acrF gene, which when translated encodes 13 amino acids and two stop codons. This insertion was found to be present in 97.59 % of 1787 ST11 genome assemblies. Non-function of AcrF in ST11 was confirmed in the laboratory as complementation with acrF from ST11 was unable to restore AcrF function in E. coli K-12 substr. MG1655 ΔacrB ΔacrF. This shows that the complement of RND efflux pumps present in laboratory bacterial strains may not reflect the situation in virulent strains of bacterial pathogens.}, } @article {pmid37072518, year = {2023}, author = {Eisenstein, M}, title = {Every base everywhere all at once: pangenomics comes of age.}, journal = {Nature}, volume = {616}, number = {7957}, pages = {618-620}, pmid = {37072518}, issn = {1476-4687}, mesh = {*Genomics/methods/standards/trends ; Species Specificity ; *Genome/genetics ; *Genetic Variation ; }, } @article {pmid37066137, year = {2023}, author = {Garrison, E and Guarracino, A and Heumos, S and Villani, F and Bao, Z and Tattini, L and Hagmann, J and Vorbrugg, S and Marco-Sola, S and Kubica, C and Ashbrook, DG and Thorell, K and Rusholme-Pilcher, RL and Liti, G and Rudbeck, E and Nahnsen, S and Yang, Z and Moses, MN and Nobrega, FL and Wu, Y and Chen, H and de Ligt, J and Sudmant, PH and Soranzo, N and Colonna, V and Williams, RW and Prins, P}, title = {Building pangenome graphs.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, doi = {10.1101/2023.04.05.535718}, pmid = {37066137}, abstract = {Pangenome graphs can represent all variation between multiple genomes, but existing methods for constructing them are biased due to reference-guided approaches. In response, we have developed PanGenome Graph Builder (PGGB), a reference-free pipeline for constructing unbi-ased pangenome graphs. PGGB uses all-to-all whole-genome alignments and learned graph embeddings to build and iteratively refine a model in which we can identify variation, measure conservation, detect recombination events, and infer phylogenetic relationships.}, } @article {pmid37065164, year = {2023}, author = {Wan, X and Takala, TM and Huynh, VA and Ahonen, SL and Paulin, L and Björkroth, J and Sironen, T and Kant, R and Saris, P}, title = {Comparative genomics of 40 Weissella paramesenteroides strains.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1128028}, pmid = {37065164}, issn = {1664-302X}, abstract = {Weissella strains are often detected in spontaneously fermented foods. Because of their abilities to produce lactic acid and functional exopolysaccharides as well as their probiotic traits, Weissella spp. improve not only the sensorial properties but also nutritional values of the fermented food products. However, some Weissella species have been associated with human and animal diseases. In the era of vast genomic sequencing, new genomic/genome data are becoming available to the public on daily pace. Detailed genomic analyses are due to provide a full understanding of individual Weissella species. In this study, the genomes of six Weissella paramesenteroides strains were de novo sequenced. The genomes of 42 W. paramesenteroides strains were compared to discover their metabolic and functional potentials in food fermentation. Comparative genomics and metabolic pathway reconstructions revealed that W. paramesenteroides is a compact group of heterofermentative bacteria with good capacity of producing secondary metabolites and vitamin Bs. Since the strains rarely harbored plasmid DNA, they did not commonly possess the genes associated with bacteriocin production. All 42 strains were shown to bear vanT gene from the glycopeptide resistance gene cluster vanG. Yet none of the strains carried virulence genes.}, } @article {pmid37059810, year = {2023}, author = {Olson, ND and Wagner, J and Dwarshuis, N and Miga, KH and Sedlazeck, FJ and Salit, M and Zook, JM}, title = {Variant calling and benchmarking in an era of complete human genome sequences.}, journal = {Nature reviews. Genetics}, volume = {24}, number = {7}, pages = {464-483}, pmid = {37059810}, issn = {1471-0064}, mesh = {Humans ; *Genome, Human ; *Benchmarking ; Genomics ; Sequence Analysis, DNA ; High-Throughput Nucleotide Sequencing ; }, abstract = {Genetic variant calling from DNA sequencing has enabled understanding of germline variation in hundreds of thousands of humans. Sequencing technologies and variant-calling methods have advanced rapidly, routinely providing reliable variant calls in most of the human genome. We describe how advances in long reads, deep learning, de novo assembly and pangenomes have expanded access to variant calls in increasingly challenging, repetitive genomic regions, including medically relevant regions, and how new benchmark sets and benchmarking methods illuminate their strengths and limitations. Finally, we explore the possible future of more complete characterization of human genome variation in light of the recent completion of a telomere-to-telomere human genome reference assembly and human pangenomes, and we consider the innovations needed to benchmark their newly accessible repetitive regions and complex variants.}, } @article {pmid37052486, year = {2023}, author = {Miranda, RP and Turrini, PCG and Bonadio, DT and Zerillo, MM and Berselli, AP and Creste, S and Van Sluys, MA}, title = {Genome Organization of Four Brazilian Xanthomonas albilineans Strains Does Not Correlate with Aggressiveness.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0280222}, pmid = {37052486}, issn = {2165-0497}, support = {310779/2017-0//Conselho Nacional de Desenvolvimento Científico e Tecnológico (CNPq)/ ; //Conselho Nacional de Desenvolvimento Científico e Tecnológico (CNPq)/ ; Financial code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior (CAPES)/ ; 2008/52074-0//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; 2016/17545-8//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; 2018/24646-0//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; 2018/23646-7//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; 2019/05424-0//Fundação de Amparo à Pesquisa do Estado de São Paulo (FAPESP)/ ; }, mesh = {*Genome, Bacterial ; Brazil ; Base Sequence ; Genomics ; *Xanthomonas/genetics/metabolism ; }, abstract = {An integrative approach combining genomics, transcriptomics, and cell biology is presented to address leaf scald disease, a major problem for the sugarcane industry. To gain insight into the biology of the causal agent, the complete genome sequences of four Brazilian Xanthomonas albilineans strains with differing virulence capabilities are presented and compared to the GPEPC73 reference strain and FJ1. Based on the aggressiveness index, different strains were compared: Xa04 and Xa11 are highly aggressive, Xa26 is intermediate, and Xa21 is the least, while, based on genome structure, Xa04 shares most of its genomic features with Xa26, and Xa11 share most of its genomic features with Xa21. In addition to presenting more clustered regularly interspaced short palindromic repeats (CRISPR) clusters, four more novel prophage insertions are present than the previously sequenced GPEPC73 and FJ1 strains. Incorporating the aggressiveness index and in vitro cell biology into these genome features indicates that disease establishment is not a result of a single determinant factor, as in most other Xanthomonas species. The Brazilian strains lack the previously described plasmids but present more prophage regions. In pairs, the most virulent and the least virulent share unique prophages. In vitro transcriptomics shed light on the 54 most highly expressed genes among the 4 strains compared to ribosomal proteins (RPs), of these, 3 outer membrane proteins. Finally, comparative albicidin inhibition rings and in vitro growth curves of the four strains also do not correlate with pathogenicity. In conclusion, the results disclose that leaf scald disease is not associated with a single shared characteristic between the most or the least pathogenic strains. IMPORTANCE An integrative approach is presented which combines genomics, transcriptomics, and cell biology to address leaf scald disease. The results presented here disclose that the disease is not associated with a single shared characteristic between the most pathogenic strains or a unique genomic pattern. Sequence data from four Brazilian strains are presented that differ in pathogenicity index: Xa04 and Xa11 are highly virulent, Xa26 is intermediate, and Xa21 is the least pathogenic strain, while, based on genome structure, Xa04 shares with Xa26, and Xa11 shares with X21 most of the genome features. Other than presenting more CRISPR clusters and prophages than the previously sequenced strains, the integration of aggressiveness and cell biology points out that disease establishment is not a result of a single determinant factor as in other xanthomonads.}, } @article {pmid37047101, year = {2023}, author = {Tenea, GN}, title = {Metabiotics Signature through Genome Sequencing and In Vitro Inhibitory Assessment of a Novel Lactococcus lactis Strain UTNCys6-1 Isolated from Amazonian Camu-Camu Fruits.}, journal = {International journal of molecular sciences}, volume = {24}, number = {7}, pages = {}, pmid = {37047101}, issn = {1422-0067}, support = {1034/2022//Universidad Técnica del Norte/ ; }, mesh = {Fruit/chemistry ; *Lactococcus lactis/genetics/metabolism ; RNA, Ribosomal, 16S/genetics ; Base Sequence ; *Bacteriocins/metabolism ; Anti-Bacterial Agents/metabolism ; }, abstract = {Metabiotics are the structural components of probiotic bacteria, functional metabolites, and/or signaling molecules with numerous beneficial properties. A novel Lactococcus lactis strain, UTNCys6-1, was isolated from wild Amazonian camu-camu fruits (Myrciaria dubia), and various functional metabolites with antibacterial capacity were found. The genome size is 2,226,248 base pairs, and it contains 2248 genes, 2191 protein-coding genes (CDSs), 50 tRNAs, 6 rRNAs, 1 16S rRNA, 1 23S rRNA, and 1 tmRNA. The average GC content is 34.88%. In total, 2148 proteins have been mapped to the EggNOG database. The specific annotation consisted of four incomplete prophage regions, one CRISPR-Cas array, six genomic islands (GIs), four insertion sequences (ISs), and four regions of interest (AOI regions) spanning three classes of bacteriocins (enterolysin_A, nisin_Z, and sactipeptides). Based on pangenome analysis, there were 6932 gene clusters, of which 751 (core genes) were commonly observed within the 11 lactococcal strains. Among them, 3883 were sample-specific genes (cloud genes) and 2298 were shell genes, indicating high genetic diversity. A sucrose transporter of the SemiSWEET family (PTS system: phosphoenolpyruvate-dependent transport system) was detected in the genome of UTNCys6-1 but not the other 11 lactococcal strains. In addition, the metabolic profile, antimicrobial susceptibility, and inhibitory activity of both protein-peptide extract (PPE) and exopolysaccharides (EPSs) against several foodborne pathogens were assessed in vitro. Furthermore, UTNCys6-1 was predicted to be a non-human pathogen that was unable to tolerate all tested antibiotics except gentamicin; metabolized several substrates; and lacks virulence factors (VFs), genes related to the production of biogenic amines, and acquired antibiotic resistance genes (ARGs). Overall, this study highlighted the potential of this strain for producing bioactive metabolites (PPE and EPSs) for agri-food and pharmaceutical industry use.}, } @article {pmid37042769, year = {2023}, author = {Ma, X and Sun, T and Zhou, J and Zhi, M and Shen, S and Wang, Y and Gu, X and Li, Z and Gao, H and Wang, P and Feng, Q}, title = {Pangenomic Study of Fusobacterium nucleatum Reveals the Distribution of Pathogenic Genes and Functional Clusters at the Subspecies and Strain Levels.}, journal = {Microbiology spectrum}, volume = {11}, number = {3}, pages = {e0518422}, pmid = {37042769}, issn = {2165-0497}, mesh = {*Fusobacterium nucleatum/genetics ; Phylogeny ; *Genomics ; Base Sequence ; Virulence Factors/genetics ; }, abstract = {Fusobacterium nucleatum is a prevalent periodontal pathogen and is associated with many systemic diseases. Our knowledge of the genomic characteristics and pathogenic effectors of different F. nucleatum strains is limited. In this study, we completed the whole genome assembly of the 4 F. nucleatum strains and carried out a comprehensive pangenomic study of 30 strains with their complete genome sequences. Phylogenetic analysis revealed that the F. nucleatum strains are mainly divided into 4 subspecies, while 1 of the sequenced strains was classified into a new subspecies. Gene composition analysis revealed that a total of 517 "core/soft-core genes" with housekeeping functions widely distributed in almost all the strains. Each subspecies had a unique gene cluster shared by strains within the subspecies. Analysis of the virulence factors revealed that many virulence factors were widely distributed across all the strains, with some present in multiple copies. Some virulence genes showed no consistent occurrence rule at the subspecies level and were specifically distributed in certain strains. The genomic islands mainly revealed strain-specific characteristics instead of subspecies level consistency, while CRISPR types and secondary metabolite biosynthetic gene clusters were identically distributed in F. nucleatum strains from the same subspecies. The variation in amino acid sites in the adhesion protein FadA did not affect the monomer and dimer 3D structures, but it may affect the binding surface and the stability of binding to host receptors. This study provides a basis for the pathogenic study of F. nucleatum at the subspecies and strain levels. IMPORTANCE We used F. nucleatum as an example to analyze the genomic characteristics of oral pathogens at the species, subspecies, and strain levels and elucidate the similarities and differences in functional genes and virulence factors among different subspecies/strains of the same oral pathogen. We believe that the unique biological characteristics of each subspecies/strain can be attributed to the differences in functional gene clusters or the presence/absence of certain virulence genes. This study showed that F. nucleatum strains from the same subspecies had similar functional gene compositions, CRISPR types, and secondary metabolite biosynthetic gene clusters, while pathogenic genes, such as virulence genes, antibiotic resistance genes, and GIs, had more strain level specificity. The findings of this study suggest that, for microbial pathogenicity studies, we should carefully consider the subspecies/strains being used, as different strains may vary greatly.}, } @article {pmid37037626, year = {2023}, author = {Lu, TY and Smaruj, PN and Fudenberg, G and Mancuso, N and Chaisson, MJP}, title = {The motif composition of variable number tandem repeats impacts gene expression.}, journal = {Genome research}, volume = {33}, number = {4}, pages = {511-524}, pmid = {37037626}, issn = {1549-5469}, support = {R01 HG012133/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; U24 HG007497/HG/NHGRI NIH HHS/United States ; R01 HG011649/HG/NHGRI NIH HHS/United States ; R01 GM140287/GM/NIGMS NIH HHS/United States ; }, mesh = {Humans ; *Minisatellite Repeats/genetics ; Phenotype ; Haplotypes ; Gene Expression ; *Adenosine Triphosphatases/genetics ; Ubiquitin-Protein Ligases/genetics ; }, abstract = {Understanding the impact of DNA variation on human traits is a fundamental question in human genetics. Variable number tandem repeats (VNTRs) make up ∼3% of the human genome but are often excluded from association analysis owing to poor read mappability or divergent repeat content. Although methods exist to estimate VNTR length from short-read data, it is known that VNTRs vary in both length and repeat (motif) composition. Here, we use a repeat-pangenome graph (RPGG) constructed on 35 haplotype-resolved assemblies to detect variation in both VNTR length and repeat composition. We align population-scale data from the Genotype-Tissue Expression (GTEx) Consortium to examine how variations in sequence composition may be linked to expression, including cases independent of overall VNTR length. We find that 9422 out of 39,125 VNTRs are associated with nearby gene expression through motif variations, of which only 23.4% are accessible from length. Fine-mapping identifies 174 genes to be likely driven by variation in certain VNTR motifs and not overall length. We highlight two genes, CACNA1C and RNF213, that have expression associated with motif variation, showing the utility of RPGG analysis as a new approach for trait association in multiallelic and highly variable loci.}, } @article {pmid37029275, year = {2023}, author = {}, title = {Tomato super-pangenome highlights the potential use of wild relatives in tomato breeding.}, journal = {Nature genetics}, volume = {55}, number = {5}, pages = {744-745}, pmid = {37029275}, issn = {1546-1718}, mesh = {*Solanum lycopersicum/genetics ; Plant Breeding ; Chromosome Mapping ; Genotype ; }, } @article {pmid37025802, year = {2023}, author = {De Mesa, CA and Mendoza, RM and Penir, SMU and de la Peña, LD and Amar, EC and Saloma, CP}, title = {Genomic analysis of Vibrio harveyi strain PH1009, a potential multi-drug resistant pathogen due to acquisition of toxin genes.}, journal = {Heliyon}, volume = {9}, number = {4}, pages = {e14926}, pmid = {37025802}, issn = {2405-8440}, abstract = {In has increasingly been observed that viral and bacterial coinfection frequently occurs among cultured shrimp and this coinfection could exacerbate the disease phenotype. Here, we describe a newly discovered bacterial strain, Vibrio harveyi PH1009 collected from Masbate Island, Philippines that was found to be co-infecting with the White Spot Syndrome virus in a sample of black tiger prawn, Penaeus monodon. The genome of V. harveyi PH1009 was sequenced, assembled, and annotated. Average Nucleotide identity calculation with Vibrio harveyi strains confirmed its taxonomic identity. It is a potential multi-drug and multi-heavy metal resistant strain based on the multiple antibiotic and heavy metal resistance determinants annotated on its genome. Two prophage regions were identified in its genome. One contained genes for Zona occludens toxin (Zot) and Accessory cholera toxin (Ace), essential toxins of toxigenic V. cholerae strains apart from CTX toxins. Pan-genome analysis of V. harveyi strains, including PH1009, revealed an "open" pan-genome for V. harveyi and a core genome mainly composed of genes necessary for growth and metabolism. Phylogenetic tree based on the core genome alignment revealed that PH1009 was closest to strains QT520, CAIM 1754, and 823tez1. Published virulence factors present on the strain QT520 suggest similar pathogenicity with PH1009. However, PH1009 Zot was not found on related strains but was present in strains HENC-01 and CAIM 148. Most unique genes found in the PH1009 strain were identified as hypothetical proteins. Further annotation showed that several of these hypothetical proteins were phage transposases, integrases, and transcription regulators, implying the role of bacteriophages in the distinct genomic features of the PH1009 genome. The PH1009 genome will serve as a valuable genomic resource for comparative genomic studies and in understanding the disease mechanism of the Vibrio harveyi species.}, } @article {pmid37024581, year = {2023}, author = {Li, N and He, Q and Wang, J and Wang, B and Zhao, J and Huang, S and Yang, T and Tang, Y and Yang, S and Aisimutuola, P and Xu, R and Hu, J and Jia, C and Ma, K and Li, Z and Jiang, F and Gao, J and Lan, H and Zhou, Y and Zhang, X and Huang, S and Fei, Z and Wang, H and Li, H and Yu, Q}, title = {Super-pangenome analyses highlight genomic diversity and structural variation across wild and cultivated tomato species.}, journal = {Nature genetics}, volume = {55}, number = {5}, pages = {852-860}, pmid = {37024581}, issn = {1546-1718}, mesh = {*Solanum lycopersicum/genetics ; Genome-Wide Association Study ; Genome, Plant/genetics ; Plant Breeding ; *Solanum/genetics ; Genomics ; }, abstract = {Effective utilization of wild relatives is key to overcoming challenges in genetic improvement of cultivated tomato, which has a narrow genetic basis; however, current efforts to decipher high-quality genomes for tomato wild species are insufficient. Here, we report chromosome-scale tomato genomes from nine wild species and two cultivated accessions, representative of Solanum section Lycopersicon, the tomato clade. Together with two previously released genomes, we elucidate the phylogeny of Lycopersicon and construct a section-wide gene repertoire. We reveal the landscape of structural variants and provide entry to the genomic diversity among tomato wild relatives, enabling the discovery of a wild tomato gene with the potential to increase yields of modern cultivated tomatoes. Construction of a graph-based genome enables structural-variant-based genome-wide association studies, identifying numerous signals associated with tomato flavor-related traits and fruit metabolites. The tomato super-pangenome resources will expedite biological studies and breeding of this globally important crop.}, } @article {pmid37023146, year = {2023}, author = {Hochhauser, D and Millman, A and Sorek, R}, title = {The defense island repertoire of the Escherichia coli pan-genome.}, journal = {PLoS genetics}, volume = {19}, number = {4}, pages = {e1010694}, pmid = {37023146}, issn = {1553-7404}, mesh = {*Escherichia coli/genetics ; *Genome, Bacterial/genetics ; Bacteria/genetics ; }, abstract = {It has become clear in recent years that anti-phage defense systems cluster non-randomly within bacterial genomes in so-called "defense islands". Despite serving as a valuable tool for the discovery of novel defense systems, the nature and distribution of defense islands themselves remain poorly understood. In this study, we comprehensively mapped the defense system repertoire of >1,300 strains of Escherichia coli, the most widely studied organism for phage-bacteria interactions. We found that defense systems are usually carried on mobile genetic elements including prophages, integrative conjugative elements and transposons, which preferentially integrate at several dozens of dedicated hotspots in the E. coli genome. Each mobile genetic element type has a preferred integration position but can carry a diverse variety of defensive cargo. On average, an E. coli genome has 4.7 hotspots occupied by defense system-containing mobile elements, with some strains possessing up to eight defensively occupied hotspots. Defense systems frequently co-localize with other systems on the same mobile genetic element, in agreement with the observed defense island phenomenon. Our data show that the overwhelming majority of the E. coli pan-immune system is carried on mobile genetic elements, explaining why the immune repertoire varies substantially between different strains of the same species.}, } @article {pmid37019751, year = {2023}, author = {Dart, E and Ahlgren, NA}, title = {New tRNA-targeting transposons that hijack phage and vesicles.}, journal = {Trends in genetics : TIG}, volume = {39}, number = {6}, pages = {433-435}, doi = {10.1016/j.tig.2023.03.004}, pmid = {37019751}, issn = {0168-9525}, mesh = {*Bacteriophages/genetics ; Gene Transfer, Horizontal/genetics ; *Cyanobacteria/genetics ; RNA, Transfer/genetics ; Genomic Islands ; }, abstract = {Genomic islands are hotspots for horizontal gene transfer (HGT) in bacteria, but, for Prochlorococcus, an abundant marine cyanobacterium, how these islands form has puzzled scientists. With the discovery of tycheposons, a new family of transposons, Hackl et al. provide evidence for elegant new mechanisms of gene rearrangement and transfer among Prochlorococcus and bacteria more broadly.}, } @article {pmid37018035, year = {2023}, author = {Muzahid, NH and Hussain, MH and Huët, MAL and Dwiyanto, J and Su, TT and Reidpath, D and Mustapha, F and Ayub, Q and Tan, HS and Rahman, S}, title = {Molecular characterization and comparative genomic analysis of Acinetobacter baumannii isolated from the community and the hospital: an epidemiological study in Segamat, Malaysia.}, journal = {Microbial genomics}, volume = {9}, number = {4}, pages = {}, pmid = {37018035}, issn = {2057-5858}, mesh = {Humans ; *Acinetobacter baumannii/genetics ; Malaysia ; Phylogeny ; Prospective Studies ; Hospitals ; Genomics ; }, abstract = {Acinetobacter baumannii is a common cause of multidrug-resistant (MDR) nosocomial infections around the world. However, little is known about the persistence and dynamics of A. baumannii in a healthy community. This study investigated the role of the community as a prospective reservoir for A. baumannii and explored possible links between hospital and community isolates. A total of 12 independent A. baumannii strains were isolated from human faecal samples from the community in Segamat, Malaysia, in 2018 and 2019. Another 15 were obtained in 2020 from patients at the co-located tertiary public hospital. The antimicrobial resistance profile and biofilm formation ability were analysed, and the relatedness of community and hospital isolates was determined using whole-genome sequencing (WGS). Antibiotic profile analysis revealed that 12 out of 15 hospital isolates were MDR, but none of the community isolates were MDR. However, phylogenetic analysis based on single-nucleotide polymorphisms (SNPs) and a pangenome analysis of core genes showed clustering between four community and two hospital strains. Such clustering of strains from two different settings based on their genomes suggests that these strains could persist in both. WGS revealed 41 potential resistance genes on average in the hospital strains, but fewer (n=32) were detected in the community strains. In contrast, 68 virulence genes were commonly seen in strains from both sources. This study highlights the possible transmission threat to public health posed by virulent A. baumannii present in the gut of asymptomatic individuals in the community.}, } @article {pmid37016310, year = {2023}, author = {Commichaux, S and Rand, H and Javkar, K and Molloy, EK and Pettengill, JB and Pightling, A and Hoffmann, M and Pop, M and Jayeola, V and Foley, S and Luo, Y}, title = {Assessment of plasmids for relating the 2020 Salmonella enterica serovar Newport onion outbreak to farms implicated by the outbreak investigation.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {165}, pmid = {37016310}, issn = {1471-2164}, support = {5U01-FD001418//Joint Institute for Food Safety and Applied Nutrition, University of Maryland/ ; }, mesh = {*Salmonella enterica ; Serogroup ; Onions/genetics ; Farms ; Phylogeny ; Plasmids/genetics ; Disease Outbreaks ; }, abstract = {BACKGROUND: The Salmonella enterica serovar Newport red onion outbreak of 2020 was the largest foodborne outbreak of Salmonella in over a decade. The epidemiological investigation suggested two farms as the likely source of contamination. However, single nucleotide polymorphism (SNP) analysis of the whole genome sequencing data showed that none of the Salmonella isolates collected from the farm regions were linked to the clinical isolates-preventing the use of phylogenetics in source identification. Here, we explored an alternative method for analyzing the whole genome sequencing data driven by the hypothesis that if the outbreak strain had come from the farm regions, then the clinical isolates would disproportionately contain plasmids found in isolates from the farm regions due to horizontal transfer.

RESULTS: SNP analysis confirmed that the clinical isolates formed a single, nearly-clonal clade with evidence for ancestry in California going back a decade. The clinical clade had a large core genome (4,399 genes) and a large and sparsely distributed accessory genome (2,577 genes, at least 64% on plasmids). At least 20 plasmid types occurred in the clinical clade, more than were found in the literature for Salmonella Newport. A small number of plasmids, 14 from 13 clinical isolates and 17 from 8 farm isolates, were found to be highly similar (> 95% identical)-indicating they might be related by horizontal transfer. Phylogenetic analysis was unable to determine the geographic origin, isolation source, or time of transfer of the plasmids, likely due to their promiscuous and transient nature. However, our resampling analysis suggested that observing a similar number and combination of highly similar plasmids in random samples of environmental Salmonella enterica within the NCBI Pathogen Detection database was unlikely, supporting a connection between the outbreak strain and the farms implicated by the epidemiological investigation.

CONCLUSION: Horizontally transferred plasmids provided evidence for a connection between clinical isolates and the farms implicated as the source of the outbreak. Our case study suggests that such analyses might add a new dimension to source tracking investigations, but highlights the need for detailed and accurate metadata, more extensive environmental sampling, and a better understanding of plasmid molecular evolution.}, } @article {pmid37016094, year = {2023}, author = {Li, W and Wang, D and Hong, X and Shi, J and Hong, J and Su, S and Loaiciga, CR and Li, J and Liang, W and Shi, J and Zhang, D}, title = {Identification and validation of new MADS-box homologous genes in 3010 rice pan-genome.}, journal = {Plant cell reports}, volume = {42}, number = {6}, pages = {975-988}, pmid = {37016094}, issn = {1432-203X}, support = {B21HJ8104//Yazhou Bay Seed Laboratory Project/ ; B14016//111 Project/ ; }, mesh = {*Genome, Plant/genetics ; *Oryza/genetics/metabolism ; MADS Domain Proteins/genetics/metabolism ; Phylogeny ; Plant Breeding ; Gene Expression Regulation, Plant/genetics ; }, abstract = {Identification and validation of ten new MADS-box homologous genes in 3010 rice pan-genome for rice breeding. The functional genome is significant for rice breeding. MADS-box genes encode transcription factors that are indispensable for rice growth and development. The reported 15,362 novel genes in the rice pan-genome (RPAN) of Asian cultivated rice accessions provided a useful gene reservoir for the identification of more MADS-box candidates to overcome the limitation for the usage of only 75 MADS-box genes identified in Nipponbare for rice breeding. Here, we report the identification and validation of ten MADS-box homologous genes in RPAN. Origin and identity analysis indicated that they are originated from different wild rice accessions and structure of motif analysis revealed high variations in their amino acid sequences. Phylogenetic results with 277 MADS-box genes in 41 species showed that all these ten MADS-box homologous genes belong to type I (SRF-like, M-type). Gene expression analysis confirmed the existence of these ten MADS-box genes in IRIS_313-10,394, all of them were expressed in flower tissues, and six of them were highly expressed during seed development. Altogether, we identified and validated experimentally, for the first time, ten novel MADS-box genes in RPAN, which provides new genetic sources for rice improvement.}, } @article {pmid37012375, year = {2023}, author = {von Meijenfeldt, FAB and Hogeweg, P and Dutilh, BE}, title = {A social niche breadth score reveals niche range strategies of generalists and specialists.}, journal = {Nature ecology & evolution}, volume = {7}, number = {5}, pages = {768-781}, pmid = {37012375}, issn = {2397-334X}, mesh = {*Ecosystem ; *Ecology ; Biological Evolution ; }, abstract = {Generalists can survive in many environments, whereas specialists are restricted to a single environment. Although a classical concept in ecology, niche breadth has remained challenging to quantify for microorganisms because it depends on an objective definition of the environment. Here, by defining the environment of a microorganism as the community it resides in, we integrated information from over 22,000 environmental sequencing samples to derive a quantitative measure of the niche, which we call social niche breadth. At the level of genera, we explored niche range strategies throughout the prokaryotic tree of life. We found that social generalists include opportunists that stochastically dominate local communities, whereas social specialists are stable but low in abundance. Social generalists have a more diverse and open pan-genome than social specialists, but we found no global correlation between social niche breadth and genome size. Instead, we observed two distinct evolutionary strategies, whereby specialists have relatively small genomes in habitats with low local diversity, but relatively large genomes in habitats with high local diversity. Together, our analysis shines data-driven light on microbial niche range strategies.}, } @article {pmid37010293, year = {2023}, author = {Maranga, M and Szczerbiak, P and Bezshapkin, V and Gligorijevic, V and Chandler, C and Bonneau, R and Xavier, RJ and Vatanen, T and Kosciolek, T}, title = {Comprehensive Functional Annotation of Metagenomes and Microbial Genomes Using a Deep Learning-Based Method.}, journal = {mSystems}, volume = {8}, number = {2}, pages = {e0117822}, pmid = {37010293}, issn = {2379-5077}, mesh = {Humans ; Metagenome/genetics ; *Deep Learning ; Molecular Sequence Annotation ; *Microbiota/genetics ; Genome, Microbial ; }, abstract = {Comprehensive protein function annotation is essential for understanding microbiome-related disease mechanisms in the host organisms. However, a large portion of human gut microbial proteins lack functional annotation. Here, we have developed a new metagenome analysis workflow integrating de novo genome reconstruction, taxonomic profiling, and deep learning-based functional annotations from DeepFRI. This is the first approach to apply deep learning-based functional annotations in metagenomics. We validate DeepFRI functional annotations by comparing them to orthology-based annotations from eggNOG on a set of 1,070 infant metagenomes from the DIABIMMUNE cohort. Using this workflow, we generated a sequence catalogue of 1.9 million nonredundant microbial genes. The functional annotations revealed 70% concordance between Gene Ontology annotations predicted by DeepFRI and eggNOG. DeepFRI improved the annotation coverage, with 99% of the gene catalogue obtaining Gene Ontology molecular function annotations, although they are less specific than those from eggNOG. Additionally, we constructed pangenomes in a reference-free manner using high-quality metagenome-assembled genomes (MAGs) and analyzed the associated annotations. eggNOG annotated more genes on well-studied organisms, such as Escherichia coli, while DeepFRI was less sensitive to taxa. Further, we show that DeepFRI provides additional annotations in comparison to the previous DIABIMMUNE studies. This workflow will contribute to novel understanding of the functional signature of the human gut microbiome in health and disease as well as guiding future metagenomics studies. IMPORTANCE The past decade has seen advancement in high-throughput sequencing technologies resulting in rapid accumulation of genomic data from microbial communities. While this growth in sequence data and gene discovery is impressive, the majority of microbial gene functions remain uncharacterized. The coverage of functional information coming from either experimental sources or inferences is low. To solve these challenges, we have developed a new workflow to computationally assemble microbial genomes and annotate the genes using a deep learning-based model DeepFRI. This improved microbial gene annotation coverage to 1.9 million metagenome-assembled genes, representing 99% of the assembled genes, which is a significant improvement compared to 12% Gene Ontology term annotation coverage by commonly used orthology-based approaches. Importantly, the workflow supports pangenome reconstruction in a reference-free manner, allowing us to analyze the functional potential of individual bacterial species. We therefore propose this alternative approach combining deep-learning functional predictions with the commonly used orthology-based annotations as one that could help us uncover novel functions observed in metagenomic microbiome studies.}, } @article {pmid37007277, year = {2023}, author = {Heng, E and Tan, LL and Tay, DWP and Lim, YH and Yang, LK and Seow, DCS and Leong, CY and Ng, V and Ng, SB and Kanagasundaram, Y and Wong, FT and Koduru, L}, title = {Cost-effective hybrid long-short read assembly delineates alternative GC-rich Streptomyces hosts for natural product discovery.}, journal = {Synthetic and systems biotechnology}, volume = {8}, number = {2}, pages = {253-261}, pmid = {37007277}, issn = {2405-805X}, abstract = {With the advent of rapid automated in silico identification of biosynthetic gene clusters (BGCs), genomics presents vast opportunities to accelerate natural product (NP) discovery. However, prolific NP producers, Streptomyces, are exceptionally GC-rich (>80%) and highly repetitive within BGCs. These pose challenges in sequencing and high-quality genome assembly which are currently circumvented via intensive sequencing. Here, we outline a more cost-effective workflow using multiplex Illumina and Oxford Nanopore sequencing with hybrid long-short read assembly algorithms to generate high quality genomes. Our protocol involves subjecting long read-derived assemblies to up to 4 rounds of polishing with short reads to yield accurate BGC predictions. We successfully sequenced and assembled 8 GC-rich Streptomyces genomes whose lengths range from 7.1 to 12.1 Mb with a median N50 of 8.2 Mb. Taxonomic analysis revealed previous misrepresentation among these strains and allowed us to propose a potentially new species, Streptomyces sydneybrenneri. Further comprehensive characterization of their biosynthetic, pan-genomic and antibiotic resistance features especially for molecules derived from type I polyketide synthase (PKS) BGCs reflected their potential as alternative NP hosts. Thus, the genome assemblies and insights presented here are envisioned to serve as gateway for the scientific community to expand their avenues in NP discovery.}, } @article {pmid37003962, year = {2023}, author = {Raza, Q and Rashid, MAR and Waqas, M and Ali, Z and Rana, IA and Khan, SH and Khan, IA and Atif, RM}, title = {Genomic diversity of aquaporins across genus Oryza provides a rich genetic resource for development of climate resilient rice cultivars.}, journal = {BMC plant biology}, volume = {23}, number = {1}, pages = {172}, pmid = {37003962}, issn = {1471-2229}, mesh = {*Oryza/metabolism ; Genomics ; Stress, Physiological/genetics ; Promoter Regions, Genetic ; *Aquaporins/genetics/metabolism ; Plant Proteins/metabolism ; Gene Expression Regulation, Plant ; Phylogeny ; }, abstract = {BACKGROUND: Plant aquaporins are critical genetic players performing multiple biological functions, especially climate resilience and water-use efficiency. Their genomic diversity across genus Oryza is yet to be explored.

RESULTS: This study identified 369 aquaporin-encoding genes from 11 cultivated and wild rice species and further categorized these into four major subfamilies, among which small basic intrinsic proteins are speculated to be ancestral to all land plant aquaporins. Evolutionarily conserved motifs in peptides of aquaporins participate in transmembrane transport of materials and their relatively complex gene structures provide an evolutionary playground for regulation of genome structure and transcription. Duplication and evolution analyses revealed higher genetic conservation among Oryza aquaporins and strong purifying selections are assisting in conserving the climate resilience associated functions. Promoter analysis highlighted enrichment of gene upstream regions with cis-acting regulatory elements involved in diverse biological processes, whereas miRNA target site prediction analysis unveiled substantial involvement of osa-miR2102-3p, osa-miR2927 and osa-miR5075 in post-transcriptional regulation of gene expression patterns. Moreover, expression patterns of japonica aquaporins were significantly perturbed in response to different treatment levels of six phytohormones and four abiotic stresses, suggesting their multifarious roles in plants survival under stressed environments. Furthermore, superior haplotypes of seven conserved orthologous aquaporins for higher thousand-grain weight are reported from a gold mine of 3,010 sequenced rice pangenomes.

CONCLUSIONS: This study unveils the complete genomic atlas of aquaporins across genus Oryza and provides a comprehensive genetic resource for genomics-assisted development of climate-resilient rice cultivars.}, } @article {pmid37000493, year = {2023}, author = {Pagnossin, D and Weir, W and Smith, A and Fuentes, M and Coelho, J and Oravcova, K}, title = {Streptococcus canis genomic epidemiology reveals the potential for zoonotic transfer.}, journal = {Microbial genomics}, volume = {9}, number = {3}, pages = {}, pmid = {37000493}, issn = {2057-5858}, mesh = {Animals ; Humans ; Dogs ; Cats ; Multilocus Sequence Typing ; Phylogeny ; Genome-Wide Association Study ; *Cat Diseases ; *Dog Diseases/epidemiology ; Genomics ; Anti-Bacterial Agents/pharmacology ; }, abstract = {Streptococcus canis, a multi-host pathogen commonly isolated from dogs and cats, has been occasionally reported in severe cases of human infection. To address the gap in knowledge on its virulence and host tropism, we investigated S. canis genomic epidemiology and report the results of this analysis for the first time. We analysed 59 S. canis whole genome sequences originating from a variety of host species, comprising 39 newly sequenced isolates from UK sources, along with all (n=20) publicly available genomes. Antimicrobial resistance (AMR) phenotype was determined for all 39 available isolates. Genomes were screened for determinants of resistance and virulence. We created a core SNP phylogeny and compared strain clustering to multi-locus sequence typing (MLST) and S. canis M-like protein (SCM) typing. We investigated the dataset for signals of host adaptation using phylogenetic analysis, accessory genome clustering and pan-genome-wide association study analysis. A total of 23 % (9/39) of isolates exhibited phenotypic resistance to lincosamides, macrolides and/or tetracyclines. This was complemented by the identification of AMR-encoding genes in all genomes: tetracycline (tetO 14 %, 8/59; and tetM 7 %, 4/59) and lincosamide/macrolide (ermB, 7 %, 4/59). AMR was more common in human (36 %, 4/11) compared to companion animal (18 %, 5/28) isolates. We identified 19 virulence gene homologues, 14 of which were present in all strains analysed. In an S. canis strain isolated from a dog with otitis externa we identified a homologue of S. pyogenes superantigen SMEZ. The MLST and SCM typing schemes were found to be incapable of accurately representing core SNP-based genomic diversity of the S. canis population. No evidence of host adaptation was detected, suggesting the potential for inter-species transmission, including zoonotic transfer.}, } @article {pmid36993855, year = {2023}, author = {Akparov, Z and Hajiyeva, S and Abbasov, M and Kaur, S and Hamwieh, A and Alsamman, AM and Hajiyev, E and Babayeva, S and Izzatullayeva, V and Mustafayeva, Z and Mehdiyeva, S and Mustafayev, O and Shahmuradov, I and Kosarev, P and Solovyev, V and Salamov, A and Jighly, A}, title = {Two major chromosome evolution events with unrivaled conserved gene content in pomegranate.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1039211}, pmid = {36993855}, issn = {1664-462X}, abstract = {Pomegranate has a unique evolutionary history given that different cultivars have eight or nine bivalent chromosomes with possible crossability between the two classes. Therefore, it is important to study chromosome evolution in pomegranate to understand the dynamics of its population. Here, we de novo assembled the Azerbaijani cultivar "Azerbaijan guloyshasi" (AG2017; 2n = 16) and re-sequenced six cultivars to track the evolution of pomegranate and to compare it with previously published de novo assembled and re-sequenced cultivars. High synteny was observed between AG2017, Bhagawa (2n = 16), Tunisia (2n = 16), and Dabenzi (2n = 18), but these four cultivars diverged from the cultivar Taishanhong (2n = 18) with several rearrangements indicating the presence of two major chromosome evolution events. Major presence/absence variations were not observed as >99% of the five genomes aligned across the cultivars, while >99% of the pan-genic content was represented by Tunisia and Taishanhong only. We also revisited the divergence between soft- and hard-seeded cultivars with less structured population genomic data, compared to previous studies, to refine the selected genomic regions and detect global migration routes for pomegranate. We reported a unique admixture between soft- and hard-seeded cultivars that can be exploited to improve the diversity, quality, and adaptability of local pomegranate varieties around the world. Our study adds body knowledge to understanding the evolution of the pomegranate genome and its implications for the population structure of global pomegranate diversity, as well as planning breeding programs aiming to develop improved cultivars.}, } @article {pmid36993842, year = {2023}, author = {Carballo, J and Bellido, AM and Selva, JP and Zappacosta, D and Gallo, CA and Albertini, E and Caccamo, M and Echenique, V}, title = {From tetraploid to diploid, a pangenomic approach to identify genes lost during synthetic diploidization of Eragrostis curvula.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1133986}, pmid = {36993842}, issn = {1664-462X}, abstract = {INTRODUCTION: In Eragrostis curvula, commonly known as weeping lovegrass, a synthetic diploidization event of the facultative apomictic tetraploid Tanganyika INTA cv. originated from the sexual diploid Victoria cv. Apomixis is an asexual reproduction by seeds in which the progeny is genetically identical to the maternal plant.

METHODS: To assess the genomic changes related to ploidy and to the reproductive mode occurring during diploidization, a mapping approach was followed to obtain the first E. curvula pangenome assembly. In this way, gDNA of Tanganyika INTA was extracted and sequenced in 2x250 Illumina pair-end reads and mapped against the Victoria genome assembly. The unmapped reads were used for variant calling, while the mapped reads were assembled using Masurca software.

RESULTS: The length of the assembly was 28,982,419 bp distributed in 18,032 contigs, and the variable genes annotated in these contigs rendered 3,952 gene models. Functional annotation of the genes showed that the reproductive pathway was differentially enriched. PCR amplification in gDNA and cDNA of Tanganyika INTA and Victoria was conducted to validate the presence/absence variation in five genes related to reproduction and ploidy. The polyploid nature of the Tanganyika INTA genome was also evaluated through the variant calling analysis showing the single nucleotide polymorphism (SNP) coverage and allele frequency distribution with a segmental allotetraploid pairing behavior.

DISCUSSION: The results presented here suggest that the genes were lost in Tanganyika INTA during the diploidization process that was conducted to suppress the apomictic pathway, affecting severely the fertility of Victoria cv.}, } @article {pmid36991151, year = {2023}, author = {Zhen, C and Chen, XK and Ge, XF and Liu, WZ}, title = {Streptomonospora mangrovi sp. nov., isolated from mangrove soil showing similar metabolic capabilities, but distinct secondary metabolites profiles.}, journal = {Archives of microbiology}, volume = {205}, number = {4}, pages = {148}, pmid = {36991151}, issn = {1432-072X}, support = {32202121//National Natural Science Foundation of China/ ; }, mesh = {Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Soil ; Fatty Acids/analysis ; DNA, Bacterial/genetics ; Soil Microbiology ; Bacterial Typing Techniques ; Diaminopimelic Acid/analysis ; Sequence Analysis, DNA ; *Actinomycetales/genetics ; }, abstract = {A novel actinomycete, designated strain S1-112[ T], was isolated from a mangrove soil sample from Hainan, China, and characterized using a polyphasic approach. Strain S1-112[ T] showed the highest similarity of the 16S rRNA gene to Streptomonospora nanhaiensis 12A09[T] (99.24%). Their close relationship was further supported by phylogenetic analyses, which placed these two strains within a stable clade. The highest values of digital DNA-DNA hybridization (dDDH, 41.4%) and average nucleotide identity (ANI, 90.55%) were detected between strain S1-112[ T] and Streptomonospora halotolerans NEAU-Jh2-17[ T]. Genotypic and phenotypic characteristics demonstrated that strain S1-112[ T] could be distinguished from its closely related relatives. We also profiled the pan-genome and metabolic features of genomic assemblies of strains belonging to the genus Streptomonospora, indicating similar functional capacities and metabolic activities. However, all of these strains showed promising potential for producing diverse types of secondary metabolites. In conclusion, strain S1-112[ T] represents a novel species of the genus Streptomonospora, for which the name Streptomonospora mangrovi sp. nov. was proposed. The type strain is S1-112[ T] (= JCM 34292[ T]).}, } @article {pmid36982787, year = {2023}, author = {Karetnikov, DI and Vasiliev, GV and Toshchakov, SV and Shmakov, NA and Genaev, MA and Nesterov, MA and Ibragimova, SM and Rybakov, DA and Gavrilenko, TA and Salina, EA and Patrushev, MV and Kochetov, AV and Afonnikov, DA}, title = {Analysis of Genome Structure and Its Variations in Potato Cultivars Grown in Russia.}, journal = {International journal of molecular sciences}, volume = {24}, number = {6}, pages = {}, pmid = {36982787}, issn = {1422-0067}, support = {075-15-2019-1662//The Ministry of Education and Science of the Russian Federation/ ; }, mesh = {*Solanum tuberosum/genetics ; DNA Copy Number Variations ; Genome, Plant ; Genomics ; Tetraploidy ; }, abstract = {Solanum tuberosum L. (common potato) is one of the most important crops produced almost all over the world. Genomic sequences of potato opens the way for studying the molecular variations related to diversification. We performed a reconstruction of genomic sequences for 15 tetraploid potato cultivars grown in Russia using short reads. Protein-coding genes were identified; conserved and variable parts of pan-genome and the repertoire of the NBS-LRR genes were characterized. For comparison, we used additional genomic sequences for twelve South American potato accessions, performed analysis of genetic diversity, and identified the copy number variations (CNVs) in two these groups of potato. Genomes of Russian potato cultivars were more homogeneous by CNV characteristics and have smaller maximum deletion size in comparison with South American ones. Genes with different CNV occurrences in two these groups of potato accessions were identified. We revealed genes of immune/abiotic stress response, transport and five genes related to tuberization and photoperiod control among them. Four genes related to tuberization and photoperiod were investigated in potatoes previously (phytochrome A among them). A novel gene, homologous to the poly(ADP-ribose) glycohydrolase (PARG) of Arabidopsis, was identified that may be involved in circadian rhythm control and contribute to the acclimatization processes of Russian potato cultivars.}, } @article {pmid36981047, year = {2023}, author = {Wartha, S and Bretschneider, N and Dangel, A and Hobmaier, B and Hörmansdorfer, S and Huber, I and Murr, L and Pavlovic, M and Sprenger, A and Wenning, M and Alter, T and Messelhäußer, U}, title = {Genetic Characterization of Listeria from Food of Non-Animal Origin Products and from Producing and Processing Companies in Bavaria, Germany.}, journal = {Foods (Basel, Switzerland)}, volume = {12}, number = {6}, pages = {}, pmid = {36981047}, issn = {2304-8158}, abstract = {Reported cases of listeriosis from food of non-animal origin (FNAO) are increasing. In order to assess the risk of exposure to Listeria monocytogenes from FNAO, the genetic characterization of the pathogen in FNAO products and in primary production and processing plants needs to be investigated. For this, 123 samples of fresh and frozen soft fruit and 407 samples of 39 plants in Bavaria, Germany that produce and process FNAO were investigated for Listeria contamination. As a result, 64 Listeria spp. isolates were detected using ISO 11290-1:2017. Environmental swabs and water and food samples were investigated. L. seeligeri (36/64, 56.25%) was the most frequently identified species, followed by L. monocytogenes (8/64, 12.50%), L. innocua (8/64, 12.50%), L. ivanovii (6/64, 9.38%), L. newyorkensis (5/64, 7.81%), and L. grayi (1/64, 1.56%). Those isolates were subsequently sequenced by whole-genome sequencing and subjected to pangenome analysis to retrieve data on the genotype, serotype, antimicrobial resistance (AMR), and virulence markers. Eight out of sixty-four Listeria spp. isolates were identified as L. monocytogenes. The serogroup analysis detected that 62.5% of the L. monocytogenes isolates belonged to serogroup IIa (1/2a and 3a) and 37.5% to serogroup IVb (4b, 4d, and 4e). Furthermore, the MLST (multilocus sequence typing) analysis of the eight detected L. monocytogenes isolates identified seven different sequence types (STs) and clonal complexes (CCs), i.e., ST1/CC1, ST2/CC2, ST6/CC6, ST7/CC7, ST21/CC21, ST504/CC475, and ST1413/CC739. The core genome MLST analysis also showed high allelic differences and suggests plant-specific isolates. Regarding the AMR, we detected phenotypic resistance against benzylpenicillin, fosfomycin, and moxifloxacin in all eight L. monocytogenes isolates. Moreover, virulence factors, such as prfA, hly, plcA, plcB, hpt, actA, inlA, inlB, and mpl, were identified in pathogenic and nonpathogenic Listeria species. The significance of L. monocytogenes in FNAO is growing and should receive increasing levels of attention.}, } @article {pmid36980919, year = {2023}, author = {Weltzer, ML and Wall, D}, title = {Social Diversification Driven by Mobile Genetic Elements.}, journal = {Genes}, volume = {14}, number = {3}, pages = {}, pmid = {36980919}, issn = {2073-4425}, support = {R35 GM140886/GM/NIGMS NIH HHS/United States ; }, mesh = {*Bacteria/genetics ; *Myxococcales/genetics ; Biological Evolution ; Genome ; Interspersed Repetitive Sequences/genetics ; }, abstract = {Social diversification in microbes is an evolutionary process where lineages bifurcate into distinct populations that cooperate with themselves but not with other groups. In bacteria, this is frequently driven by horizontal transfer of mobile genetic elements (MGEs). Here, the resulting acquisition of new genes changes the recipient's social traits and consequently how they interact with kin. These changes include discriminating behaviors mediated by newly acquired effectors. Since the producing cell is protected by cognate immunity factors, these selfish elements benefit from selective discrimination against recent ancestors, thus facilitating their proliferation and benefiting the host. Whether social diversification benefits the population at large is less obvious. The widespread use of next-generation sequencing has recently provided new insights into population dynamics in natural habitats and the roles MGEs play. MGEs belong to accessory genomes, which often constitute the majority of the pangenome of a taxon, and contain most of the kin-discriminating loci that fuel rapid social diversification. We further discuss mechanisms of diversification and its consequences to populations and conclude with a case study involving myxobacteria.}, } @article {pmid36979037, year = {2023}, author = {Sedeek, AM and Salah, I and Kamel, HL and Soltan, MA and Nour, E and Alshammari, A and Riaz Rajoka, MS and Elsayed, TR}, title = {Genome-Based Analysis of the Potential Bioactivity of the Terrestrial Streptomyces vinaceusdrappus Strain AC-40.}, journal = {Biology}, volume = {12}, number = {3}, pages = {}, pmid = {36979037}, issn = {2079-7737}, abstract = {Streptomyces are factories of antimicrobial secondary metabolites. We isolated a Streptomyces species associated with the Pelargonium graveolens rhizosphere. Its total metabolic extract exhibited potent antibacterial and antifungal properties against all the tested pathogenic microbes. Whole genome sequencing and genome analyses were performed to take a look at its main characteristics and to reconstruct the metabolic pathways that can be associated with biotechnologically useful traits. AntiSMASH was used to identify the secondary metabolite gene clusters. In addition, we searched for known genes associated with plant growth-promoting characteristics. Finally, a comparative and pan-genome analysis with three closely related genomes was conducted. It was identified as Streptomyces vinaceusdrappus strain AC-40. Genome mining indicated the presence of several secondary metabolite gene clusters. Some of them are identical or homologs to gene clusters of known metabolites with antimicrobial, antioxidant, and other bioactivities. It also showed the presence of several genes related to plant growth promotion traits. The comparative genome analysis indicated that at least five of these gene clusters are highly conserved through rochei group genomes. The genotypic and phenotypic characteristics of S. vinaceusdrappus strain AC-40 indicate that it is a promising source of beneficial secondary metabolites with pharmaceutical and biotechnological applications.}, } @article {pmid36975929, year = {2023}, author = {Lu, W and Zhang, T and Zhang, Q and Zhang, N and Jia, L and Ma, S and Xia, Q}, title = {FibH Gene Complete Sequences (FibHome) Revealed Silkworm Pedigree.}, journal = {Insects}, volume = {14}, number = {3}, pages = {}, pmid = {36975929}, issn = {2075-4450}, support = {32122084//National Natural Science Foundation of China/ ; cstc2020jcyj-bshX0092//Chongqing Natural Science Foundation/ ; cstc2021ycjh-bgzxm0005//Chongqing Natural Science Foundation/ ; SWU120012//PhD Start-up Foundation of Southwest University/ ; SWU-KT22042//Fundamental Research Funds for Central Universities/ ; }, abstract = {The highly repetitive and variable fibroin heavy chain (FibH) gene can be used as a silkworm identification; however, only a few complete FibH sequences are known. In this study, we extracted and examined 264 FibH gene complete sequences (FibHome) from a high-resolution silkworm pan-genome. The average FibH lengths of the wild silkworm, local, and improved strains were 19,698 bp, 16,427 bp, and 15,795 bp, respectively. All FibH sequences had a conserved 5' and 3' terminal non-repetitive (5' and 3' TNR, 99.74% and 99.99% identity, respectively) sequence and a variable repetitive core (RC). The RCs differed greatly, but they all shared the same motif. During domestication or breeding, the FibH gene mutated with hexanucleotide (GGTGCT) as the core unit. Numerous variations existed that were not unique to wild and domesticated silkworms. However, the transcriptional factor binding sites, such as fibroin modulator-binding protein, were highly conserved and had 100% identity in the FibH gene's intron and upstream sequences. The local and improved strains with the same FibH gene were divided into four families using this gene as a marker. Family I contained a maximum of 62 strains with the optional FibH (Opti-FibH, 15,960 bp) gene. This study provides new insights into FibH variations and silkworm breeding.}, } @article {pmid36969737, year = {2022}, author = {Baaijens, JA and Bonizzoni, P and Boucher, C and Della Vedova, G and Pirola, Y and Rizzi, R and Sirén, J}, title = {Computational graph pangenomics: a tutorial on data structures and their applications.}, journal = {Natural computing}, volume = {21}, number = {1}, pages = {81-108}, pmid = {36969737}, issn = {1567-7818}, support = {R01 AI141810/AI/NIAID NIH HHS/United States ; R01 HG011392/HG/NHGRI NIH HHS/United States ; }, abstract = {Computational pangenomics is an emerging research field that is changing the way computer scientists are facing challenges in biological sequence analysis. In past decades, contributions from combinatorics, stringology, graph theory and data structures were essential in the development of a plethora of software tools for the analysis of the human genome. These tools allowed computational biologists to approach ambitious projects at population scale, such as the 1000 Genomes Project. A major contribution of the 1000 Genomes Project is the characterization of a broad spectrum of genetic variations in the human genome, including the discovery of novel variations in the South Asian, African and European populations-thus enhancing the catalogue of variability within the reference genome. Currently, the need to take into account the high variability in population genomes as well as the specificity of an individual genome in a personalized approach to medicine is rapidly pushing the abandonment of the traditional paradigm of using a single reference genome. A graph-based representation of multiple genomes, or a graph pangenome, is replacing the linear reference genome. This means completely rethinking well-established procedures to analyze, store, and access information from genome representations. Properly addressing these challenges is crucial to face the computational tasks of ambitious healthcare projects aiming to characterize human diversity by sequencing 1M individuals (Stark et al. 2019). This tutorial aims to introduce readers to the most recent advances in the theory of data structures for the representation of graph pangenomes. We discuss efficient representations of haplotypes and the variability of genotypes in graph pangenomes, and highlight applications in solving computational problems in human and microbial (viral) pangenomes.}, } @article {pmid36968469, year = {2023}, author = {Rehman, MNU and Dawar, FU and Zeng, J and Fan, L and Feng, W and Wang, M and Yang, N and Guo, G and Zheng, J}, title = {Complete genome sequence analysis of Edwardsiella tarda SC002 from hatchlings of Siamese crocodile.}, journal = {Frontiers in veterinary science}, volume = {10}, number = {}, pages = {1140655}, pmid = {36968469}, issn = {2297-1769}, abstract = {Edwardsiella tarda is a Gram-negative, facultative anaerobic rod-shaped bacterium and the causative agent of the systemic disease "Edwardsiellosis". It is commonly prevalent in aquatic organisms with subsequent economic loss and hence has attracted increasing attention from researchers. In this study, we investigated the complete genome sequence of a highly virulent isolate Edwardsiella tarda SC002 isolated from hatchlings of the Siamese crocodile. The genome of SC002 consisted of one circular chromosome of length 3,662,469 bp with a 57.29% G+C content and four novel plasmids. A total of 3,734 protein-coding genes, 12 genomic islands (GIs), 7 prophages, 48 interspersed repeat sequences, 248 tandem repeat sequences, a CRISPR component with a total length of 175 bp, and 171 ncRNAs (tRNA = 106, sRNA = 37, and rRNA = 28) were predicted. In addition, the coding genes of assembled genome were successfully annotated against eight general databases (NR = 3,618/3,734, COG = 2,947/3,734, KEGG = 3,485/3,734, SWISS-PROT = 2,787/3,734, GO = 2,648/3,734, Pfam = 2,648/3,734, CAZy = 130/3,734, and TCDB = 637/3,734) and four pathogenicity-related databases (ARDB = 11/3,734, CARD = 142/3,734, PHI = 538/3,734, and VFDB = 315/3,734). Pan-genome and comparative genome analyses of the complete sequenced genomes confirmed their evolutionary relationships. The present study confirmed that E. tarda SC002 is a potential pathogen bearing a bulk amount of antibiotic resistance, virulence, and pathogenic genes and its open pan-genome may enhance its host range in the future.}, } @article {pmid36968185, year = {2023}, author = {Zhou, H and Yan, F and Hao, F and Ye, H and Yue, M and Woeste, K and Zhao, P and Zhang, S}, title = {Pan-genome and transcriptome analyses provide insights into genomic variation and differential gene expression profiles related to disease resistance and fatty acid biosynthesis in eastern black walnut (Juglans nigra).}, journal = {Horticulture research}, volume = {10}, number = {3}, pages = {uhad015}, pmid = {36968185}, issn = {2662-6810}, abstract = {Walnut (Juglans) species are used as nut crops worldwide. Eastern black walnut (EBW, Juglans nigra), a diploid, horticultural important woody species is native to much of eastern North America. Although it is highly valued for its wood and nut, there are few resources for understanding EBW genetics. Here, we present a high-quality genome assembly of J. nigra based on Illumina, Pacbio, and Hi-C technologies. The genome size was 540.8 Mb, with a scaffold N50 size of 35.1 Mb, and 99.0% of the assembly was anchored to 16 chromosomes. Using this genome as a reference, the resequencing of 74 accessions revealed the effective population size of J. nigra declined during the glacial maximum. A single whole-genome duplication event was identified in the J. nigra genome. Large syntenic blocks among J. nigra, Juglans regia, and Juglans microcarpa predominated, but inversions of more than 600 kb were identified. By comparing the EBW genome with those of J. regia and J. microcarpa, we detected InDel sizes of 34.9 Mb in J. regia and 18.3 Mb in J. microcarpa, respectively. Transcriptomic analysis of differentially expressed genes identified five presumed NBS-LRR (NUCLEOTIDE BINDING SITE-LEUCINE-RICH REPEAT) genes were upregulated during the development of walnut husks and shells compared to developing embryos. We also identified candidate genes with essential roles in seed oil synthesis, including FAD (FATTY ACID DESATURASE) and OLE (OLEOSIN). Our work advances the understanding of fatty acid bioaccumulation and disease resistance in nut crops, and also provides an essential resource for conducting genomics-enabled breeding in walnut.}, } @article {pmid36966465, year = {2023}, author = {Velt, A and Frommer, B and Blanc, S and Holtgräwe, D and Duchêne, É and Dumas, V and Grimplet, J and Hugueney, P and Kim, C and Lahaye, M and Matus, JT and Navarro-Payá, D and Orduña, L and Tello-Ruiz, MK and Vitulo, N and Ware, D and Rustenholz, C}, title = {An improved reference of the grapevine genome reasserts the origin of the PN40024 highly homozygous genotype.}, journal = {G3 (Bethesda, Md.)}, volume = {13}, number = {5}, pages = {}, pmid = {36966465}, issn = {2160-1836}, support = {P30 CA045508/CA/NCI NIH HHS/United States ; }, mesh = {*Genome, Plant ; Genotype ; Chromosome Mapping ; Base Sequence ; Molecular Sequence Annotation ; *Vitis/genetics ; }, abstract = {The genome sequence of the diploid and highly homozygous Vitis vinifera genotype PN40024 serves as the reference for many grapevine studies. Despite several improvements to the PN40024 genome assembly, its current version PN12X.v2 is quite fragmented and only represents the haploid state of the genome with mixed haplotypes. In fact, being nearly homozygous, this genome contains several heterozygous regions that are yet to be resolved. Taking the opportunity of improvements that long-read sequencing technologies offer to fully discriminate haplotype sequences, an improved version of the reference, called PN40024.v4, was generated. Through incorporating long genomic sequencing reads to the assembly, the continuity of the 12X.v2 scaffolds was highly increased with a total number decreasing from 2,059 to 640 and a reduction in N bases of 88%. Additionally, the full alternative haplotype sequence was built for the first time, the chromosome anchoring was improved and the number of unplaced scaffolds was reduced by half. To obtain a high-quality gene annotation that outperforms previous versions, a liftover approach was complemented with an optimized annotation workflow for Vitis. Integration of the gene reference catalogue and its manual curation have also assisted in improving the annotation, while defining the most reliable estimation of 35,230 genes to date. Finally, we demonstrated that PN40024 resulted from 9 selfings of cv. "Helfensteiner" (cross of cv. "Pinot noir" and "Schiava grossa") instead of a single "Pinot noir". These advances will help maintain the PN40024 genome as a gold-standard reference, also contributing toward the eventual elaboration of the grapevine pangenome.}, } @article {pmid36966359, year = {2023}, author = {Yu, Z and Chen, Y and Zhou, Y and Zhang, Y and Li, M and Ouyang, Y and Chebotarov, D and Mauleon, R and Zhao, H and Xie, W and McNally, KL and Wing, RA and Guo, W and Zhang, J}, title = {Rice Gene Index: A comprehensive pan-genome database for comparative and functional genomics of Asian rice.}, journal = {Molecular plant}, volume = {16}, number = {5}, pages = {798-801}, doi = {10.1016/j.molp.2023.03.012}, pmid = {36966359}, issn = {1752-9867}, mesh = {*Oryza/genetics ; Genomics ; Genome, Plant/genetics ; }, } @article {pmid36961900, year = {2023}, author = {Rubio, A and Sprang, M and Garzón, A and Moreno-Rodriguez, A and Pachón-Ibáñez, ME and Pachón, J and Andrade-Navarro, MA and Pérez-Pulido, AJ}, title = {Analysis of bacterial pangenomes reduces CRISPR dark matter and reveals strong association between membranome and CRISPR-Cas systems.}, journal = {Science advances}, volume = {9}, number = {12}, pages = {eadd8911}, pmid = {36961900}, issn = {2375-2548}, mesh = {*CRISPR-Cas Systems/genetics ; Bacteria/genetics ; Genome, Bacterial ; *Bacteriophages/genetics ; }, abstract = {CRISPR-Cas systems are prokaryotic acquired immunity mechanisms, which are found in 40% of bacterial genomes. They prevent viral infections through small DNA fragments called spacers. However, the vast majority of these spacers have not yet been associated with the virus they recognize, and it has been named CRISPR dark matter. By analyzing the spacers of tens of thousands of genomes from six bacterial species, we have been able to reduce the CRISPR dark matter from 80% to as low as 15% in some of the species. In addition, we have observed that, when a genome presents CRISPR-Cas systems, this is accompanied by particular sets of membrane proteins. Our results suggest that when bacteria present membrane proteins that make it compete better in its environment and these proteins are, in turn, receptors for specific phages, they would be forced to acquire CRISPR-Cas.}, } @article {pmid36961866, year = {2023}, author = {Matlock, W and Lipworth, S and Chau, KK and AbuOun, M and Barker, L and Kavanagh, J and Andersson, M and Oakley, S and Morgan, M and Crook, DW and Read, DS and Anjum, M and Shaw, LP and Stoesser, N and , }, title = {Enterobacterales plasmid sharing amongst human bloodstream infections, livestock, wastewater, and waterway niches in Oxfordshire, UK.}, journal = {eLife}, volume = {12}, number = {}, pages = {}, doi = {10.7554/eLife.85302}, pmid = {36961866}, issn = {2050-084X}, support = {MRF_MRF-145-0004-TPG-AVISO/MRF/MRF/United Kingdom ; MRF-145-0004-TPG-AVISO/MRF/MRF/United Kingdom ; }, abstract = {Plasmids enable the dissemination of antimicrobial resistance (AMR) in common Enterobacterales pathogens, representing a major public health challenge. However, the extent of plasmid sharing and evolution between Enterobacterales causing human infections and other niches remains unclear, including the emergence of resistance plasmids. Dense, unselected sampling is highly relevant to developing our understanding of plasmid epidemiology and designing appropriate interventions to limit the emergence and dissemination of plasmid-associated AMR. We established a geographically and temporally restricted collection of human bloodstream infection (BSI)-associated, livestock-associated (cattle, pig, poultry, and sheep faeces, farm soils) and wastewater treatment work (WwTW)-associated (influent, effluent, waterways upstream/downstream of effluent outlets) Enterobacterales. Isolates were collected between 2008-2020 from sites <60km apart in Oxfordshire, UK. Pangenome analysis of plasmid clusters revealed shared 'backbones', with phylogenies suggesting an intertwined ecology where well-conserved plasmid backbones carry diverse accessory functions, including AMR genes. Many plasmid 'backbones' were seen across species and niches, raising the possibility that plasmid movement between these followed by rapid accessory gene change could be relatively common. Overall, the signature of identical plasmid sharing is likely to be a highly transient one, implying that plasmid movement might be occurring at greater rates than previously estimated, raising a challenge for future genomic One Health studies.}, } @article {pmid36961505, year = {2023}, author = {Delgado-Blas, JF and Ovejero, CM and David, S and Serna, C and Pulido-Vadillo, M and Montero, N and Aanensen, DM and Abadia-Patiño, L and Gonzalez-Zorn, B}, title = {Global scenario of the RmtE pan-aminoglycoside-resistance mechanism: emergence of the rmtE4 gene in South America associated with a hospital-related IncL plasmid.}, journal = {Microbial genomics}, volume = {9}, number = {3}, pages = {}, pmid = {36961505}, issn = {2057-5858}, mesh = {Aminoglycosides/pharmacology ; Plasmids/genetics ; Hospitals ; Animals ; Venezuela ; *Klebsiella/isolation & purification ; Anti-Bacterial Agents/pharmacology ; Bacterial Proteins/genetics ; Phylogeny ; }, abstract = {Antimicrobial resistance (AMR) mechanisms, especially those conferring resistance to critically important antibiotics, are a great concern for public health. 16S rRNA methyltransferases (16S-RMTases) abolish the effectiveness of most clinically used aminoglycosides, but some of them are considered sporadic, such as RmtE. The main goals of this work were the genomic analysis of bacteria producing 16S-RMTases from a 'One Health' perspective in Venezuela, and the study of the epidemiological and evolutionary scenario of RmtE variants and their related mobile genetic elements (MGEs) worldwide. A total of 21 samples were collected in 2014 from different animal and environmental sources in the Cumaná region (Venezuela). Highly aminoglycoside-resistant Enterobacteriaceae isolates were selected, identified and screened for 16S-RMTase genes. Illumina and Nanopore whole-genome sequencing data were combined to obtain hybrid assemblies and analyse their sequence type, resistome, plasmidome and pan-genome. Genomic collections of rmtE variants and their associated MGEs were generated to perform epidemiological and phylogenetic analyses. A single 16S-RMTase, the novel RmtE4, was identified in five Klebsiella isolates from wastewater samples of Cumaná. This variant possessed three amino acid modifications with respect to RmtE1-3 (Asn152Asp, Val216Ile and Lys267Ile), representing the most genetic distant among all known and novel variants described in this work, and the second most prevalent. rmtE variants were globally spread, and their geographical distribution was determined by the associated MGEs and the carrying bacterial species. Thus, rmtE4 was found to be confined to Klebsiella isolates from South America, where it was closely related to ISVsa3 and an uncommon IncL plasmid related with hospital environments. This work uncovered the global scenario of RmtE and the existence of RmtE4, which could potentially emerge from South America. Surveillance and control measures should be developed based on these findings in order to prevent the dissemination of this AMR mechanism and preserve public health worldwide.}, } @article {pmid36958270, year = {2023}, author = {Botelho, J and Tüffers, L and Fuss, J and Buchholz, F and Utpatel, C and Klockgether, J and Niemann, S and Tümmler, B and Schulenburg, H}, title = {Phylogroup-specific variation shapes the clustering of antimicrobial resistance genes and defence systems across regions of genome plasticity in Pseudomonas aeruginosa.}, journal = {EBioMedicine}, volume = {90}, number = {}, pages = {104532}, pmid = {36958270}, issn = {2352-3964}, mesh = {Humans ; *Pseudomonas aeruginosa/genetics ; *Genome, Bacterial ; Anti-Bacterial Agents ; Drug Resistance, Bacterial/genetics ; Phylogeny ; Cluster Analysis ; }, abstract = {BACKGROUND: Pseudomonas aeruginosa is an opportunistic pathogen consisting of three phylogroups (hereafter named A, B, and C). Here, we assessed phylogroup-specific evolutionary dynamics across available and also new P. aeruginosa genomes.

METHODS: In this genomic analysis, we first generated new genome assemblies for 18 strains of the major P. aeruginosa clone type (mPact) panel, comprising a phylogenetically diverse collection of clinical and environmental isolates for this species. Thereafter, we combined these new genomes with 1991 publicly available P. aeruginosa genomes for a phylogenomic and comparative analysis. We specifically explored to what extent antimicrobial resistance (AMR) genes, defence systems, and virulence genes vary in their distribution across regions of genome plasticity (RGPs) and "masked" (RGP-free) genomes, and to what extent this variation differs among the phylogroups.

FINDINGS: We found that members of phylogroup B possess larger genomes, contribute a comparatively larger number of pangenome families, and show lower abundance of CRISPR-Cas systems. Furthermore, AMR and defence systems are pervasive in RGPs and integrative and conjugative/mobilizable elements (ICEs/IMEs) from phylogroups A and B, and the abundance of these cargo genes is often significantly correlated. Moreover, inter- and intra-phylogroup interactions occur at the accessory genome level, suggesting frequent recombination events. Finally, we provide here the mPact panel of diverse P. aeruginosa strains that may serve as a valuable reference for functional analyses.

INTERPRETATION: Altogether, our results highlight distinct pangenome characteristics of the P. aeruginosa phylogroups, which are possibly influenced by variation in the abundance of CRISPR-Cas systems and are shaped by the differential distribution of other defence systems and AMR genes.

FUNDING: German Science Foundation, Max-Planck Society, Leibniz ScienceCampus Evolutionary Medicine of the Lung, BMBF program Medical Infection Genomics, Kiel Life Science Postdoc Award.}, } @article {pmid36952017, year = {2023}, author = {Boden, SA and McIntosh, RA and Uauy, C and Krattinger, SG and Dubcovsky, J and Rogers, WJ and Xia, XC and Badaeva, ED and Bentley, AR and Brown-Guedira, G and Caccamo, M and Cattivelli, L and Chhuneja, P and Cockram, J and Contreras-Moreira, B and Dreisigacker, S and Edwards, D and González, FG and Guzmán, C and Ikeda, TM and Karsai, I and Nasuda, S and Pozniak, C and Prins, R and Sen, TZ and Silva, P and Simkova, H and Zhang, Y and , }, title = {Updated guidelines for gene nomenclature in wheat.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {136}, number = {4}, pages = {72}, pmid = {36952017}, issn = {1432-2242}, support = {FT210100810//Australian Research Council/ ; DP210103744//Australian Research Council/ ; DP210100296//Australian Research Council/ ; DP200100762//Australian Research Council/ ; BB/P016855/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/OS/NW/000016/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/P010741/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; 2022-68013-36439//National Institute of Food and Agriculture/ ; 2022-68013-36439//National Institute of Food and Agriculture/ ; 03/A215//UNCPBA/ ; CerealMed//Partnership for Research and Innovation in the Mediterranean Area/ ; INTA-PD-E3-I060//Instituto Nacional de Investigacion Agropecuaria, Uruguay/ ; RYC-2017-21891//Ministerio de Ciencia e Innovación/ ; GINOP-2.3.2-15-2016-00029//Innovation and Technology Ministry/ ; Diversity//Genome Canada/ ; Domestication//Genome Canada/ ; Discovery//Genome Canada/ ; Delivery//Genome Canada/ ; CZ.02.1.01/0.0/0.0/16_019/0000827//European Regional Development Fund/ ; PCI2019-103526//Programa Estatal de l+D+i Orientada a los Retos de la Sociedad/ ; PICT 2019-03256//Agencia Nacional de Promoción de la Investigacion/ ; 2030-21000-024-00D//Agricultural Research Service/ ; INIA_L1_CS_39//Instituto Nacional de Investigación Agropecuaria/ ; INIA_L1_CS_35//Instituto Nacional de Investigación Agropecuaria/ ; PR_FSA_2009_1_1369//Agencia Nacional de Investigación e Innovación/ ; FSA_1_2013_1_12980//Agencia Nacional de Investigación e Innovación/ ; FSA_1_2018_1_152918//Agencia Nacional de Investigación e Innovación/ ; }, mesh = {*Triticum/genetics ; *Plant Breeding ; Phenotype ; Genes, Plant ; Edible Grain/genetics ; }, abstract = {Here, we provide an updated set of guidelines for naming genes in wheat that has been endorsed by the wheat research community. The last decade has seen a proliferation in genomic resources for wheat, including reference- and pan-genome assemblies with gene annotations, which provide new opportunities to detect, characterise, and describe genes that influence traits of interest. The expansion of genetic information has supported growth of the wheat research community and catalysed strong interest in the genes that control agronomically important traits, such as yield, pathogen resistance, grain quality, and abiotic stress tolerance. To accommodate these developments, we present an updated set of guidelines for gene nomenclature in wheat. These guidelines can be used to describe loci identified based on morphological or phenotypic features or to name genes based on sequence information, such as similarity to genes characterised in other species or the biochemical properties of the encoded protein. The updated guidelines provide a flexible system that is not overly prescriptive but provides structure and a common framework for naming genes in wheat, which may be extended to related cereal species. We propose these guidelines be used henceforth by the wheat research community to facilitate integration of data from independent studies and allow broader and more efficient use of text and data mining approaches, which will ultimately help further accelerate wheat research and breeding.}, } @article {pmid36946261, year = {2023}, author = {Liang, Q and Muñoz-Amatriaín, M and Shu, S and Lo, S and Wu, X and Carlson, JW and Davidson, P and Goodstein, DM and Phillips, J and Janis, NM and Lee, EJ and Liang, C and Morrell, PL and Farmer, AD and Xu, P and Close, TJ and Lonardi, S}, title = {A view of the pan-genome of domesticated Cowpea (Vigna unguiculata [L.] Walp.).}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20319}, doi = {10.1002/tpg2.20319}, pmid = {36946261}, issn = {1940-3372}, support = {Cooperative Agreement AID-OAA-A-13-00070//United States Agency for International Development/ ; 32172568//National Natural Science Foundation of China/ ; Hatch Project CA-R-BPS-5306-H//U.S. Department of Agriculture/ ; IIS 1814359//National Science Foundation/ ; IOS 1543963//National Science Foundation/ ; Non-Assistance Cooperative Agreement 58-5030-7-069//Agricultural Research Service/ ; //National Ten-Thousand Talents Program of China/ ; Contract No. DE-AC02-05CH11231//U.S. Department of Energy/ ; 2021C02065-6-3//Major Science and Technology Project of Plant Breeding in Zhejiang Province/ ; }, abstract = {Cowpea, Vigna unguiculata L. Walp., is a diploid warm-season legume of critical importance as both food and fodder in sub-Saharan Africa. This species is also grown in Northern Africa, Europe, Latin America, North America, and East to Southeast Asia. To capture the genomic diversity of domesticates of this important legume, de novo genome assemblies were produced for representatives of six subpopulations of cultivated cowpea identified previously from genotyping of several hundred diverse accessions. In the most complete assembly (IT97K-499-35), 26,026 core and 4963 noncore genes were identified, with 35,436 pan genes when considering all seven accessions. GO terms associated with response to stress and defense response were highly enriched among the noncore genes, while core genes were enriched in terms related to transcription factor activity, and transport and metabolic processes. Over 5 million single nucleotide polymorphisms (SNPs) relative to each assembly and over 40 structural variants >1 Mb in size were identified by comparing genomes. Vu10 was the chromosome with the highest frequency of SNPs, and Vu04 had the most structural variants. Noncore genes harbor a larger proportion of potentially disruptive variants than core genes, including missense, stop gain, and frameshift mutations; this suggests that noncore genes substantially contribute to diversity within domesticated cowpea.}, } @article {pmid36944612, year = {2023}, author = {Zhou, Y and Yu, Z and Chebotarov, D and Chougule, K and Lu, Z and Rivera, LF and Kathiresan, N and Al-Bader, N and Mohammed, N and Alsantely, A and Mussurova, S and Santos, J and Thimma, M and Troukhan, M and Fornasiero, A and Green, CD and Copetti, D and Kudrna, D and Llaca, V and Lorieux, M and Zuccolo, A and Ware, D and McNally, K and Zhang, J and Wing, RA}, title = {Pan-genome inversion index reveals evolutionary insights into the subpopulation structure of Asian rice.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {1567}, pmid = {36944612}, issn = {2041-1723}, support = {S10 OD028621/OD/NIH HHS/United States ; }, mesh = {*Oryza/genetics ; Sequence Analysis, DNA ; Genome, Plant/genetics ; Biological Evolution ; Phylogeny ; }, abstract = {Understanding and exploiting genetic diversity is a key factor for the productive and stable production of rice. Here, we utilize 73 high-quality genomes that encompass the subpopulation structure of Asian rice (Oryza sativa), plus the genomes of two wild relatives (O. rufipogon and O. punctata), to build a pan-genome inversion index of 1769 non-redundant inversions that span an average of ~29% of the O. sativa cv. Nipponbare reference genome sequence. Using this index, we estimate an inversion rate of ~700 inversions per million years in Asian rice, which is 16 to 50 times higher than previously estimated for plants. Detailed analyses of these inversions show evidence of their effects on gene expression, recombination rate, and linkage disequilibrium. Our study uncovers the prevalence and scale of large inversions (≥100 bp) across the pan-genome of Asian rice and hints at their largely unexplored role in functional biology and crop performance.}, } @article {pmid36944262, year = {2023}, author = {Milner, DS and Galindo, LJ and Irwin, NAT and Richards, TA}, title = {Transporter Proteins as Ecological Assets and Features of Microbial Eukaryotic Pangenomes.}, journal = {Annual review of microbiology}, volume = {77}, number = {}, pages = {45-66}, doi = {10.1146/annurev-micro-032421-115538}, pmid = {36944262}, issn = {1545-3251}, mesh = {*Eukaryota/genetics ; *Eukaryotic Cells ; Membrane Transport Proteins ; Gene Duplication ; Phenotype ; }, abstract = {Here we review two connected themes in evolutionary microbiology: (a) the nature of gene repertoire variation within species groups (pangenomes) and (b) the concept of metabolite transporters as accessory proteins capable of providing niche-defining "bolt-on" phenotypes. We discuss the need for improved sampling and understanding of pangenome variation in eukaryotic microbes. We then review the factors that shape the repertoire of accessory genes within pangenomes. As part of this discussion, we outline how gene duplication is a key factor in both eukaryotic pangenome variation and transporter gene family evolution. We go on to outline how, through functional characterization of transporter-encoding genes, in combination with analyses of how transporter genes are gained and lost from accessory genomes, we can reveal much about the niche range, the ecology, and the evolution of virulence of microbes. We advocate for the coordinated systematic study of eukaryotic pangenomes through genome sequencing and the functional analysis of genes found within the accessory gene repertoire.}, } @article {pmid36943133, year = {2023}, author = {Kronen, M and Vázquez-Campos, X and Wilkins, MR and Lee, M and Manefield, MJ}, title = {Evidence for a Putative Isoprene Reductase in Acetobacterium wieringae.}, journal = {mSystems}, volume = {8}, number = {2}, pages = {e0011923}, pmid = {36943133}, issn = {2379-5077}, mesh = {*Oxidoreductases/genetics ; *Acetobacterium/genetics ; Butadienes/metabolism ; }, abstract = {Recent discoveries of isoprene-metabolizing microorganisms suggest they might play an important role in the global isoprene budget. Under anoxic conditions, isoprene can be used as an electron acceptor and is reduced to methylbutene. This study describes the proteogenomic profiling of an isoprene-reducing bacterial culture to identify organisms and genes responsible for the isoprene hydrogenation reaction. A metagenome-assembled genome (MAG) of the most abundant (89% relative abundance) lineage in the enrichment, Acetobacterium wieringae, was obtained. Comparative proteogenomics and reverse transcription-PCR (RT-PCR) identified a putative five-gene operon from the A. wieringae MAG upregulated during isoprene reduction. The operon encodes a putative oxidoreductase, three pleiotropic nickel chaperones (2 × HypA, HypB), and one 4Fe-4S ferredoxin. The oxidoreductase is proposed as the putative isoprene reductase with a binding site for NADH, flavin adenine dinucleotide (FAD), two pairs of canonical [4Fe-4S] clusters, and a putative iron-sulfur cluster site in a Cys6-bonding environment. Well-studied Acetobacterium strains, such as A. woodii DSM 1030, A. wieringae DSM 1911, or A. malicum DSM 4132, do not encode the isoprene-regulated operon but encode, like many other bacteria, a homolog of the putative isoprene reductase (~47 to 49% amino acid sequence identity). Uncharacterized homologs of the putative isoprene reductase are observed across the Firmicutes, Spirochaetes, Tenericutes, Actinobacteria, Chloroflexi, Bacteroidetes, and Proteobacteria, suggesting the ability of biohydrogenation of unfunctionalized conjugated doubled bonds in other unsaturated hydrocarbons. IMPORTANCE Isoprene was recently shown to act as an electron acceptor for a homoacetogenic bacterium. The focus of this study is the molecular basis for isoprene reduction. By comparing a genome from our isoprene-reducing enrichment culture, dominated by Acetobacterium wieringae, with genomes of other Acetobacterium lineages that do not reduce isoprene, we shortlisted candidate genes for isoprene reduction. Using comparative proteogenomics and reverse transcription-PCR we have identified a putative five-gene operon encoding an oxidoreductase referred to as putative isoprene reductase.}, } @article {pmid36941438, year = {2023}, author = {Zheng, X and Xu, S and Wang, Z and Tao, X and Liu, Y and Dai, L and Li, Y and Zhang, W}, title = {Sifting through the core-genome to identify putative cross-protective antigens against Riemerella anatipestifer.}, journal = {Applied microbiology and biotechnology}, volume = {107}, number = {9}, pages = {3085-3098}, pmid = {36941438}, issn = {1432-0614}, mesh = {Animals ; *Poultry Diseases/prevention & control ; Reproducibility of Results ; *Riemerella/genetics ; Vaccines, Subunit ; Ducks ; *Flavobacteriaceae Infections/prevention & control/veterinary ; }, abstract = {Infectious serositis of ducks, caused by Riemerella anatipestifer, is one of the main infectious diseases that harm commercial ducks. Whole-strain-based vaccines with no or few cross-protection were observed between different serotypes of R. anatipestifer, and so far, control of infection is hampered by a lack of effective vaccines, especially subunit vaccines with cross-protection. Since the concept of reverse vaccinology was introduced, it has been widely used to screen for protective antigens in important pathogens. In this study, pan-genome binding reverse vaccinology, an emerging approach to vaccine candidate screening, was used to screen for cross-protective antigens against R. anatipestifer. Thirty proteins were identified from the core-genome as potential cross-protective antigens. Three of these proteins were recombinantly expressed, and their immunoreactivity with five antisera (anti-serotypes 1, 2, 6, 10, and 11) was demonstrated by Western blotting. Our study established a method for high-throughput screening of cross-protective antigens against R. anatipestifer in silico, which will lay the foundation for the development of a cross-protective subunit vaccine controlling R. anatipestifer infection. KEY POINTS: • Pan-genome binding reverse vaccine approach was first established in R. anatipestifer to screen for subunit vaccine candidates. • Thirty potential cross-protective antigens against R. anatipestifer were identified by this method. • The reliability of the method was verified preliminarily by the results of Western blotting of three of these potential antigens.}, } @article {pmid36938359, year = {2023}, author = {Nguyen, HN and Sharp, GM and Stahl-Rommel, S and Velez Justiniano, YA and Castro, CL and Nelman-Gonzalez, M and O'Rourke, A and Lee, MD and Williamson, J and McCool, C and Crucian, B and Clark, KW and Jain, M and Castro-Wallace, SL}, title = {Microbial isolation and characterization from two flex lines from the urine processor assembly onboard the international space station.}, journal = {Biofilm}, volume = {5}, number = {}, pages = {100108}, pmid = {36938359}, issn = {2590-2075}, abstract = {Urine, humidity condensate, and other sources of non-potable water are processed onboard the International Space Station (ISS) by the Water Recovery System (WRS) yielding potable water. While some means of microbial control are in place, including a phosphoric acid/hexavalent chromium urine pretreatment solution, many areas within the WRS are not available for routine microbial monitoring. Due to refurbishment needs, two flex lines from the Urine Processor Assembly (UPA) within the WRS were removed and returned to Earth. The water from within these lines, as well as flush water, was microbially evaluated. Culture and culture-independent analysis revealed the presence of Burkholderia, Paraburkholderia, and Leifsonia. Fungal culture also identified Fusarium and Lecythophora. Hybrid de novo genome analysis of the five distinct Burkholderia isolates identified them as B. contaminans, while the two Paraburkholderia isolates were identified as P. fungorum. Chromate-resistance gene clusters were identified through pangenomic analysis that differentiated these genomes from previously studied isolates recovered from the point-of-use potable water dispenser and/or current NCBI references, indicating that unique populations exist within distinct niches in the WRS. Beyond genomic analysis, fixed samples directly from the lines were imaged by environmental scanning electron microscopy, which detailed networks of fungal-bacterial biofilms. This is the first evidence of biofilm formation within flex lines from the UPA onboard the ISS. For all bacteria isolated, biofilm potential was further characterized, with the B. contaminans isolates demonstrating the most considerable biofilm formation. Moreover, the genomes of the B. contaminans revealed secondary metabolite gene clusters associated with quorum sensing, biofilm formation, antifungal compounds, and hemolysins. The potential production of these gene cluster metabolites was phenotypically evaluated through biofilm, bacterial-fungal interaction, and hemolytic assays. Collectively, these data identify the UPA flex lines as a unique ecological niche and novel area of biofilm growth within the WRS. Further investigation of these organisms and their resistance profiles will enable engineering controls directed toward biofilm prevention in future space station water systems.}, } @article {pmid36936699, year = {2023}, author = {Ali Alghamdi, B and Al-Johani, I and Al-Shamrani, JM and Musamed Alshamrani, H and Al-Otaibi, BG and Almazmomi, K and Yusnoraini Yusof, N}, title = {Antimicrobial resistance in methicillin-resistant staphylococcus aureus.}, journal = {Saudi journal of biological sciences}, volume = {30}, number = {4}, pages = {103604}, pmid = {36936699}, issn = {1319-562X}, abstract = {In the medical community, antibiotics are revered as a miracle because they stop diseases brought on by pathogenic bacteria. Antibiotics have become the cornerstone of contemporary medical advancements ever since penicillin was discovered. Antibiotic resistance developed among germs quickly, placing a strain in the medical field. Methicillin-resistant Staphylococcus aureus (MRSA), Since 1961, has emerged as the major general antimicrobial resistant bacteria (AMR) worldwide. MRSA can easily transmit across the hospital system and has mostly gained resistance to medications called beta-lactamases. This enzyme destroys the cell wall of beta-lactam antibiotics resulting in resistance against that respective antibiotic. Daptomycin, linezolid and vancomycin were previously used to treat MRSA infections. However, due to mutations and Single nucleotide polymorphisms (SNPs) in Open reading frames (ORFs) and SCCmec machinery of respective antibody, MRSA developed resistance against those antibiotics. The MRSA strains (USA300, CC398, CC130 etc.), when their pan-genomes were analyzed were found the genes involved in invoking resistance against the antibiotics as well as the epidemiology of that respective strain. PENC (penicillin plus potassium clavulanate) is the new antibiotic showing potential in treatment of MRSA though it is itself resistant against penicillin alone. In this review, our main focus is on mechanism of development of AMR in MRSA, how different ORFs are involved in evoking resistance in MRSA and what is the core-genome of different antimicrobial resistant MRSA.}, } @article {pmid36935100, year = {2023}, author = {Khan, K and Jalal, K and Uddin, R}, title = {Pangenome profiling of novel drug target against vancomycin-resistant Enterococcus faecium.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-14}, doi = {10.1080/07391102.2023.2191134}, pmid = {36935100}, issn = {1538-0254}, abstract = {Enterococcus faecium is a frequent causative agent of nosocomial infection mainly acquired from outgoing hospital patients (Hospital Acquired Infection-HAIs). They are largely involved in the outbreaks of bacteremia, UTI, and endocarditis with a high transmissibility rate. The recent emergence of VRE strain (i.e. vancomycin resistant enterococcus) turned it into high priority pathogen for which new drug research is of dire need. Therefore, in current study, pangenome and resistome analyses were performed for available antibiotic-resistant genomes (n = 216) of E. faecium. It resulted in the prediction of around 5,059 genes as an accessory gene, 1,076 genes as core and 1,558 genes made up a unique genome fraction. Core genes common to all strains were further used for the identification of potent drug targets by applying subtractive genomics approach. Moreover, the COG functional analysis showed that these genomes are highly enriched in metabolic pathways such as in translational, ribosomal, proteins, carbohydrates and nucleotide transport metabolism. Through subtractive genomics it was observed that 431 proteins were non-homologous to the human proteome, 166 identified as essential for pathogen survival while 26 as potential and unique therapeutic targets. Finally, 3-dehydroquinate dehydrogenase was proposed as a potent drug target for further therapeutic candidate identification. Moreover, the molecular docking and dynamic simulation technique were applied to performed a virtual screening of natural product libraries (i.e., TCM and Ayurvedic compounds) along with 3-amino-4,5-dihydroxy-cyclohex-1-enecarboxylate (DHS) as a standard compound to validate the study. Consequently, Argeloside I, Apigenin-7-O-gentiobioside (from Ayurvedic library), ZINC85571062, and ZINC85570908 (TCM library) compounds were identified as potential inhibitors of 3-dehydroquinate dehydrogenase. The study proposed new compounds as novel therapeutics, however, further experimental validation is needed as a follow-up.Communicated by Ramaswamy H. Sarma.}, } @article {pmid36928221, year = {2023}, author = {Juscamayta-López, E and Valdivia, F and Soto, MP and Nureña, B and Horna, H}, title = {A pangenome approach-based loop-mediated isothermal amplification assay for the specific and early detection of Bordetella pertussis.}, journal = {Scientific reports}, volume = {13}, number = {1}, pages = {4356}, pmid = {36928221}, issn = {2045-2322}, support = {D43 TW007393/TW/FIC NIH HHS/United States ; }, mesh = {Humans ; *Bordetella pertussis/genetics ; *Whooping Cough/diagnosis ; Nucleic Acid Amplification Techniques ; Molecular Diagnostic Techniques ; Sensitivity and Specificity ; }, abstract = {Despite widespread vaccination, Bordetella pertussis continues to cause pertussis infections worldwide, leaving infants at the highest risk of severe illness and death, while people around them are likely the main sources of infection and rapidly spread the disease. Rapid and less complex molecular testing for the specific and timely diagnosis of pertussis remains a challenge that could help to prevent the disease from worsening and prevent its transmission. We aimed to develop and validate a colorimetric loop-mediated isothermal amplification (LAMP) assay using a new target uvrD_2 informed by the pangenome for the specific and early detection of B. pertussis. Compared to that of multitarget quantitative polymerase chain reaction (multitarget qPCR) using a large clinical DNA specimen (n = 600), the diagnostic sensitivity and specificity of the uvrD_2 LAMP assay were 100.0% and 98.6%, respectively, with a 99.7% degree of agreement between the two assays. The novel colorimetric uvrD_2 LAMP assay is highly sensitive and specific for detecting B. pertussis DNA in nasopharyngeal swabs and showed similar diagnostic accuracy to complex and high-cost multitarget qPCR, but it is faster, simpler, and inexpensive, which makes it very helpful for the reliable and timely diagnosis of pertussis in primary health care and resource-limited settings.}, } @article {pmid36925467, year = {2023}, author = {Deng, Y and Jiang, ZM and Han, XF and Su, J and Yu, LY and Liu, WH and Zhang, YQ}, title = {Pangenome analysis of the genus Herbiconiux and proposal of four new species associated with Chinese medicinal plants.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1119226}, pmid = {36925467}, issn = {1664-302X}, abstract = {Five Gram-stain-positive, aerobic, non-motile actinobacterial strains designated as CPCC 205763[T], CPCC 203386[T], CPCC 205716[T], CPCC 203406[T], and CPCC 203407 were obtained from different ecosystems associated with four kinds of Chinese traditional medicinal plants. The 16S rRNA gene sequences of these five strains showed closely related to members of the genus Herbiconiux of the family Microbacteriaceae, with the highest similarities of 97.4-99.7% to the four validly named species of Herbiconiux. In the phylogenetic trees based on 16S rRNA gene sequences and the core genome, these isolates clustered into the clade of the genus Herbiconiux within the lineage of the family Microbacteriaceae. The overall genome relatedness indexes (values of ANI and dDDH) and the phenotypic properties (morphological, physiological and chemotaxonomic characteristics) of these isolates, readily supported to affiliate them to the genus Herbiconiux, representing four novel species, with the isolates CPCC 203406[T] and CPCC 203407 being classified in the same species. For which the names Herbiconiux aconitum sp. nov. (type strain CPCC 205763[T] = I19A-01430[T] = CGMCC 1.60067[T]), Herbiconiux daphne sp. nov. (type strain CPCC 203386[T] = I10A-01569[T] = DSM 24546[T] = KCTC 19839[T]), Herbiconiux gentiana sp. nov. (type strain CPCC 205716[T] = I21A-01427[T] = CGMCC 1.60064[T]), and Herbiconiux oxytropis sp. nov. (type strain CPCC 203406[T] = I10A-02268[T] = DSM 24549[T] = KCTC 19840[T]) were proposed, respectively. In the genomes of these five strains, the putative encoding genes for amidase, endoglucanase, phosphatase, and superoxidative dismutase were retrieved, which were classified as biosynthetic genes/gene-clusters regarding plant growth-promotion (PGP) functions. The positive results from IAA-producing, cellulose-degrading and anti-oxidation experiments further approved their potential PGP bio-functions. Pangenome analysis of the genus Herbiconiux supported the polyphasic taxonomy results and confirmed their bio-function potential.}, } @article {pmid36919598, year = {2023}, author = {Lee, RRQ and Cher, WY and Wang, J and Chen, Y and Chae, E}, title = {Generating minimum set of gRNA to cover multiple targets in multiple genomes with MINORg.}, journal = {Nucleic acids research}, volume = {51}, number = {8}, pages = {e43}, pmid = {36919598}, issn = {1362-4962}, mesh = {Humans ; CRISPR-Cas Systems ; Gene Knockout Techniques ; Polymerase Chain Reaction ; *Software ; *RNA, Guide, CRISPR-Cas Systems ; }, abstract = {MINORg is an offline gRNA design tool that generates the smallest possible combination of gRNA capable of covering all desired targets in multiple non-reference genomes. As interest in pangenomic research grows, so does the workload required for large screens in multiple individuals. MINORg aims to lessen this workload by capitalising on sequence homology to favour multi-target gRNA while simultaneously screening multiple genetic backgrounds in order to generate reusable gRNA panels. We demonstrated the practical application of MINORg by knocking out 11 homologous genes tandemly arrayed in a multi-gene cluster in two Arabidopsis thaliana lineages using three gRNA output by MINORg. We also described a new PCR-free modular cloning system for multiplexing gRNA, and used it to knockout three tandemly arrayed genes in another multi-gene cluster with gRNA designed by MINORg. Source code is freely available at https://github.com/rlrq/MINORg.}, } @article {pmid36919166, year = {2023}, author = {Viana, MVC and Galdino, JH and Profeta, R and Oliveira, M and Tavares, L and de Castro Soares, S and Carneiro, P and Wattam, AR and Azevedo, V}, title = {Analysis of Corynebacterium silvaticum genomes from Portugal reveals a single cluster and a clade suggested to produce diphtheria toxin.}, journal = {PeerJ}, volume = {11}, number = {}, pages = {e14895}, pmid = {36919166}, issn = {2167-8359}, mesh = {Swine ; Animals ; *Diphtheria Toxin/genetics ; Portugal/epidemiology ; Phylogeny ; *Deer/metabolism ; Corynebacterium ; Sus scrofa/metabolism ; Zoonoses ; }, abstract = {BACKGROUND: Corynebacterium silvaticum is a pathogenic, gram-positive bacterial species that causes caseous lymphadenitis in wild boars, domestic pigs and roe deer in Western Europe. It can affect animal production and cause zoonosis. Genome analysis has suggested that one strain from Portugal and one from Austria could probably produce the diphtheria toxin (DT), which inhibits protein synthesis and can cause death.

METHODS: To further investigate the species genetic diversity and probable production of DT by Portuguese strains, eight isolates from this country were sequenced and compared to 38 public ones.

RESULTS: Strains from Portugal are monophyletic, nearly identical, form a unique cluster and have 27 out of 36 known Corynebacterium virulence or niche factors. All of them lack a frameshift in the tox gene and were suggested to produce DT. A phylogenetic analysis shows that the species has diverged into two clades. Clade 1 is composed of strains that were suggested to have the ability to produce DT, represented by the monophyletic strains from Portugal and strain 05-13 from Austria. Clade 2 is composed of strains unable to produce DT due to a frameshifted tox gene. The second clade is represented by strains from Austria, Germany and Switzerland. Ten genome clusters were detected, in which strains from Germany are the most diverse. Strains from Portugal belong to an exclusive cluster. The pangenome has 2,961 proteins and is nearly closed (α = 0.968). Exclusive genes shared by clusters 1 and 2, and Portuguese strains are probably not related to disease manifestation as they share the same host but could play a role in their extra-host environmental adaptation. These results show the potential of the species to cause zoonosis, possibly diphtheria. The identified clusters, exclusively shaded genes, and exclusive STs identified in Portugal could be applied in the identification and epidemiology of the species.}, } @article {pmid36916949, year = {2023}, author = {Svahn, AJ and Suster, CJE and Chang, SL and Rockett, RJ and Sim, EM and Cliff, OM and Wang, Q and Arnott, A and Ramsperger, M and Sorrell, TC and Sintchenko, V and Prokopenko, M}, title = {Pangenome Analysis of a Salmonella Enteritidis Population Links a Major Outbreak to a Gifsy-1-Like Prophage Containing Anti-Inflammatory Gene gogB.}, journal = {Microbiology spectrum}, volume = {11}, number = {2}, pages = {e0279122}, pmid = {36916949}, issn = {2165-0497}, abstract = {A major outbreak of the globally significant Salmonella Enteritidis foodborne pathogen was identified within a large clinical data set by a program of routine WGS of clinical presentations of salmonellosis in New South Wales, Australia. Pangenome analysis helped to quantify and isolate prophage content within the accessory partition of the pangenome. A prophage similar to Gifsy-1 (henceforth GF-1L) was found to occur in all isolates of the outbreak core SNP cluster, and in three other isolates. Further analysis revealed that the GF-1L prophage carried the gogB virulence factor. These observations suggest that GF-1L may be an important marker of virulence for S. Enteritidis population screening and, that anti-inflammatory, gogB-mediated virulence currently associated with Salmonella Typhimurium may also be displayed by S. Enteritidis. IMPORTANCE We examined 5 years of genomic and epidemiological data for the significant global foodborne pathogen, Salmonella enterica. Although Salmonella enterica subspecies enterica serovar Enteritidis (S. Enteritidis) is the leading cause of salmonellosis in the USA and Europe, prior to 2018 it was not endemic in the southern states of Australia. However, in 2018 a large outbreak led to the endemicity of S. Enteritidis in New South Wales, Australia, and a unique opportunity to study this phenomenon. Using pangenome analysis we uncovered that this clone contained a Gifsy-1-like prophage harboring the known virulence factor gogB. The prophage reported has not previously been described in S. Enteritidis isolates.}, } @article {pmid36914349, year = {2023}, author = {Wang, D and Fletcher, GC and Gagic, D and On, SLW and Palmer, JS and Flint, SH}, title = {Comparative genome identification of accessory genes associated with strong biofilm formation in Vibrio parahaemolyticus.}, journal = {Food research international (Ottawa, Ont.)}, volume = {166}, number = {}, pages = {112605}, doi = {10.1016/j.foodres.2023.112605}, pmid = {36914349}, issn = {1873-7145}, mesh = {*Vibrio parahaemolyticus/genetics ; Biofilms ; Genomics ; Operon ; Cellulose ; }, abstract = {Vibrio parahaemolyticus biofilms on the seafood processing plant surfaces are a potential source of seafood contamination and subsequent food poisoning. Strains differ in their ability to form biofilm, but little is known about the genetic characteristics responsible for biofilm development. In this study, pangenome and comparative genome analysis of V. parahaemolyticus strains reveals genetic attributes and gene repertoire that contribute to robust biofilm formation. The study identified 136 accessory genes that were exclusively present in strong biofilm forming strains and these were functionally assigned to the Gene Ontology (GO) pathways of cellulose biosynthesis, rhamnose metabolic and catabolic processes, UDP-glucose processes and O antigen biosynthesis (p < 0.05). Strategies of CRISPR-Cas defence and MSHA pilus-led attachment were implicated via Kyoto Encyclopedia of Genes and Genomes (KEGG) annotation. Higher levels of horizontal gene transfer (HGT) were inferred to confer more putatively novel properties on biofilm-forming V. parahaemolyticus. Furthermore, cellulose biosynthesis, a neglected potential virulence factor, was identified as being acquired from within the order Vibrionales. The cellulose synthase operons in V. parahaemolyticus were examined for their prevalence (22/138, 15.94 %) and were found to consist of the genes bcsG, bcsE, bcsQ, bcsA, bcsB, bcsZ, bcsC. This study provides insights into robust biofilm formation of V. parahaemolyticus at the genomic level and facilitates: identification of key attributes for robust biofilm formation, elucidation of biofilm formation mechanisms and development of potential targets for novel control strategies of persistent V. parahaemolyticus.}, } @article {pmid36912660, year = {2023}, author = {Ranković, T and Nikolić, I and Berić, T and Popović, T and Lozo, J and Medić, O and Stanković, S}, title = {Genome Analysis of Two Pseudomonas syringae pv. aptata Strains with Different Virulence Capacity Isolated from Sugar Beet: Features of Successful Pathogenicity in the Phyllosphere Microbiome.}, journal = {Microbiology spectrum}, volume = {11}, number = {2}, pages = {e0359822}, pmid = {36912660}, issn = {2165-0497}, abstract = {Members of the Pseudomonas syringae species complex are heterogeneous bacteria that are the most abundant bacterial plant pathogens in the plant phyllosphere, with strong abilities to exist on and infect different plant hosts and survive in/outside agroecosystems. In this study, the draft genome sequences of two pathogenic P. syringae pv. aptata strains with different in planta virulence capacities isolated from the phyllosphere of infected sugar beet were analyzed to evaluate putative features of survival strategies and to determine the pathogenic potential of the strains. The draft genomes of P. syringae pv. aptata strains P16 and P21 are 5,974,057 bp and 6,353,752 bp in size, have GC contents of 59.03% and 58.77%, respectively, and contain 3,439 and 3,536 protein-coding sequences, respectively. For both average nucleotide identity and pangenome analysis, P16 and P21 largely clustered with other pv. aptata strains from the same isolation source. We found differences in the repertoire of effectors of the type III secretion system among all 102 selected strains, suggesting that the type III secretion system is a critical factor in the different virulent phenotypes of P. syringae pv. aptata. During genome analysis of the highly virulent strain P21, we discovered genes for T3SS effectors (AvrRpm1, HopAW1, and HopAU1) that were not previously found in genomes of P. syringae pv. aptata. We also identified coding sequences for pantothenate kinase, VapC endonuclease, phospholipase, and pectate lyase in both genomes, which may represent novel effectors of the type III secretion system. IMPORTANCE Genome analysis has an enormous effect on understanding the life strategies of plant pathogens. Comparing similarities with pathogens involved in other epidemics could elucidate the pathogen life cycle when a new outbreak happens. This study represents the first in-depth genome analysis of Pseudomonas syringae pv. aptata, the causative agent of leaf spot disease of sugar beet. Despite the increasing number of disease reports in recent years worldwide, there is still a lack of information about the genomic features, epidemiology, and pathogenic life strategies of this particular pathogen. Our findings provide advances in disease etiology (especially T3SS effector repertoire) and elucidate the role of environmental adaptations required for prevalence in the pathobiome of the sugar beet. From the perspective of the very heterogeneous P. syringae species complex, this type of analysis has specific importance in reporting the characteristics of individual strains.}, } @article {pmid36910224, year = {2023}, author = {Coskun, ÖK and Gomez-Saez, GV and Beren, M and Ozcan, D and Hosgormez, H and Einsiedl, F and Orsi, WD}, title = {Carbon metabolism and biogeography of candidate phylum "Candidatus Bipolaricaulota" in geothermal environments of Biga Peninsula, Turkey.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1063139}, pmid = {36910224}, issn = {1664-302X}, abstract = {Terrestrial hydrothermal springs and aquifers are excellent sites to study microbial biogeography because of their high physicochemical heterogeneity across relatively limited geographic regions. In this study, we performed 16S rRNA gene sequencing and metagenomic analyses of the microbial diversity of 11 different geothermal aquifers and springs across the tectonically active Biga Peninsula (Turkey). Across geothermal settings ranging in temperature from 43 to 79°C, one of the most highly represented groups in both 16S rRNA gene and metagenomic datasets was affiliated with the uncultivated phylum "Candidatus Bipolaricaulota" (former "Ca. Acetothermia" and OP1 division). The highest relative abundance of "Ca. Bipolaricaulota" was observed in a 68°C geothermal brine sediment, where it dominated the microbial community, representing 91% of all detectable 16S rRNA genes. Correlation analysis of "Ca. Bipolaricaulota" operational taxonomic units (OTUs) with physicochemical parameters indicated that salinity was the strongest environmental factor measured associated with the distribution of this novel group in geothermal fluids. Correspondingly, analysis of 23 metagenome-assembled genomes (MAGs) revealed two distinct groups of "Ca. Bipolaricaulota" MAGs based on the differences in carbon metabolism: one group encoding the bacterial Wood-Ljungdahl pathway (WLP) for H2 dependent CO2 fixation is selected for at lower salinities, and a second heterotrophic clade that lacks the WLP that was selected for under hypersaline conditions in the geothermal brine sediment. In conclusion, our results highlight that the biogeography of "Ca. Bipolaricaulota" taxa is strongly correlated with salinity in hydrothermal ecosystems, which coincides with key differences in carbon acquisition strategies. The exceptionally high relative abundance of apparently heterotrophic representatives of this novel candidate Phylum in geothermal brine sediment observed here may help to guide future enrichment experiments to obtain representatives in pure culture.}, } @article {pmid36909378, year = {2023}, author = {Gupta, P and Li, S}, title = {Editorial: Methods in genome, pan-genome, pan-transcriptome, and gene regulatory network (GRN) construction and analysis.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1152708}, pmid = {36909378}, issn = {1664-462X}, } @article {pmid36906708, year = {2023}, author = {Tanabe, Y and Yamaguchi, H and Yoshida, M and Kai, A and Okazaki, Y}, title = {Characterization of a bloom-associated alphaproteobacterial lineage, 'Candidatus Phycosocius': insights into freshwater algal-bacterial interactions.}, journal = {ISME communications}, volume = {3}, number = {1}, pages = {20}, pmid = {36906708}, issn = {2730-6151}, abstract = {Marine bacterial lineages associated with algal blooms, such as the Roseobacter clade, have been well characterized in ecological and genomic contexts, yet such lineages have rarely been explored in freshwater blooms. This study performed phenotypic and genomic analyses of an alphaproteobacterial lineage 'Candidatus Phycosocius' (denoted the CaP clade), one of the few lineages ubiquitously associated with freshwater algal blooms, and described a novel species: 'Ca. Phycosocius spiralis.' Phylogenomic analyses indicated that the CaP clade is a deeply branching lineage in the Caulobacterales. Pangenome analyses revealed characteristic features of the CaP clade: aerobic anoxygenic photosynthesis and essential vitamin B auxotrophy. Genome size varies widely among members of the CaP clade (2.5-3.7 Mb), likely a result of independent genome reductions at each lineage. This includes a loss of tight adherence pilus genes (tad) in 'Ca. P. spiralis' that may reflect its adoption of a unique spiral cell shape and corkscrew-like burrowing activity at the algal surface. Notably, quorum sensing (QS) proteins showed incongruent phylogenies, suggesting that horizontal transfers of QS genes and QS-involved interactions with specific algal partners might drive CaP clade diversification. This study elucidates the ecophysiology and evolution of proteobacteria associated with freshwater algal blooms.}, } @article {pmid36901726, year = {2023}, author = {Sonnenberg, CB and Haugen, P}, title = {Bipartite Genomes in Enterobacterales: Independent Origins of Chromids, Elevated Openness and Donors of Horizontally Transferred Genes.}, journal = {International journal of molecular sciences}, volume = {24}, number = {5}, pages = {}, pmid = {36901726}, issn = {1422-0067}, mesh = {*Genome, Bacterial ; Plasmids ; Bacteria/genetics ; *Gammaproteobacteria ; Codon Usage ; Gene Transfer, Horizontal ; }, abstract = {Multipartite bacteria have one chromosome and one or more chromid. Chromids are believed to have properties that enhance genomic flexibility, making them a favored integration site for new genes. However, the mechanism by which chromosomes and chromids jointly contribute to this flexibility is not clear. To shed light on this, we analyzed the openness of chromosomes and chromids of the two bacteria, Vibrio and Pseudoalteromonas, both which belong to the Enterobacterales order of Gammaproteobacteria, and compared the genomic openness with that of monopartite genomes in the same order. We applied pangenome analysis, codon usage analysis and the HGTector software to detect horizontally transferred genes. Our findings suggest that the chromids of Vibrio and Pseudoalteromonas originated from two separate plasmid acquisition events. Bipartite genomes were found to be more open compared to monopartite. We found that the shell and cloud pangene categories drive the openness of bipartite genomes in Vibrio and Pseudoalteromonas. Based on this and our two recent studies, we propose a hypothesis that explains how chromids and the chromosome terminus region contribute to the genomic plasticity of bipartite genomes.}, } @article {pmid36900455, year = {2023}, author = {López-García, E and Benítez-Cabello, A and Ramiro-García, J and Ladero, V and Arroyo-López, FN}, title = {In Silico Evidence of the Multifunctional Features of Lactiplantibacillus pentosus LPG1, a Natural Fermenting Agent Isolated from Table Olive Biofilms.}, journal = {Foods (Basel, Switzerland)}, volume = {12}, number = {5}, pages = {}, pmid = {36900455}, issn = {2304-8158}, support = {RTI2018-100883-B-I00, MCIU/AEI/FEDER, UE//Ministerio de Ciencia, Innovación y Universidades (Spain)/ ; }, abstract = {In recent years, there has been a growing interest in obtaining probiotic bacteria from plant origins. This is the case of Lactiplantibacillus pentosus LPG1, a lactic acid bacterial strain isolated from table olive biofilms with proven multifunctional features. In this work, we have sequenced and closed the complete genome of L. pentosus LPG1 using both Illumina and PacBio technologies. Our intention is to carry out a comprehensive bioinformatics analysis and whole-genome annotation for a further complete evaluation of the safety and functionality of this microorganism. The chromosomic genome had a size of 3,619,252 bp, with a GC (Guanine-Citosine) content of 46.34%. L. pentosus LPG1 also had two plasmids, designated as pl1LPG1 and pl2LPG1, with lengths of 72,578 and 8713 bp (base pair), respectively. Genome annotation revealed that the sequenced genome consisted of 3345 coding genes and 89 non-coding sequences (73 tRNA and 16 rRNA genes). Taxonomy was confirmed by Average Nucleotide Identity analysis, which grouped L. pentosus LPG1 with other sequenced L. pentosus genomes. Moreover, the pan-genome analysis showed that L. pentosus LPG1 was closely related to the L. pentosus strains IG8, IG9, IG11, and IG12, all of which were isolated from table olive biofilms. Resistome analysis reported the absence of antibiotic resistance genes, whilst PathogenFinder tool classified the strain as a non-human pathogen. Finally, in silico analysis of L. pentosus LPG1 showed that many of its previously reported technological and probiotic phenotypes corresponded with the presence of functional genes. In light of these results, we can conclude that L. pentosus LPG1 is a safe microorganism and a potential human probiotic with a plant origin and application as a starter culture for vegetable fermentations.}, } @article {pmid36899131, year = {2023}, author = {Kim, E and Jung, HI and Park, SH and Kim, HY and Kim, SK}, title = {Comprehensive genome analysis of Burkholderia contaminans SK875, a quorum-sensing strain isolated from the swine.}, journal = {AMB Express}, volume = {13}, number = {1}, pages = {30}, pmid = {36899131}, issn = {2191-0855}, support = {2021//Konkuk University/ ; }, abstract = {The Burkholderia cepacia complex (BCC) is a Gram-negative bacterial, including Burkholderia contaminans species. Although the plain Burkholderia is pervasive from taxonomic and genetic perspectives, a common characteristic is that they may use the quorum-sensing (QS) system. In our previous study, we generated the complete genome sequence of Burkholderia contaminans SK875 isolated from the respiratory tract. To our knowledge, this is the first study to report functional genomic features of B. contaminans SK875 for understanding the pathogenic characteristics. In addition, comparative genomic analysis for five B. contaminans genomes was performed to provide comprehensive information on the disease potential of B. contaminans species. Analysis of average nucleotide identity (ANI) showed that the genome has high similarity (> 96%) with other B. contaminans strains. Five B. contaminans genomes yielded a pangenome of 8832 coding genes, a core genome of 5452 genes, the accessory genome of 2128 genes, and a unique genome of 1252 genes. The 186 genes were specific to B. contaminans SK875, including toxin higB-2, oxygen-dependent choline dehydrogenase, and hypothetical proteins. Genotypic analysis of the antimicrobial resistance of B. contaminans SK875 verified resistance to tetracycline, fluoroquinolone, and aminoglycoside. Compared with the virulence factor database, we identified 79 promising virulence genes such as adhesion system, invasions, antiphagocytic, and secretion systems. Moreover, 45 genes of 57 QS-related genes that were identified in B. contaminans SK875 indicated high sequence homology with other B. contaminans strains. Our results will help to gain insight into virulence, antibiotic resistance, and quorum sensing for B. contaminans species.}, } @article {pmid36898633, year = {2023}, author = {Salaheen, S and Kim, SW and Springer, HR and Hovingh, EP and Van Kessel, JAS and Haley, BJ}, title = {Genomic diversity of antimicrobial-resistant and Shiga toxin gene-harboring non-O157 Escherichia coli from dairy calves.}, journal = {Journal of global antimicrobial resistance}, volume = {33}, number = {}, pages = {164-170}, doi = {10.1016/j.jgar.2023.02.022}, pmid = {36898633}, issn = {2213-7173}, mesh = {Animals ; Cattle ; Humans ; Shiga Toxin ; *Escherichia coli Infections/veterinary ; Phylogeny ; *Shiga-Toxigenic Escherichia coli/genetics ; Genomics ; }, abstract = {OBJECTIVES: Shiga toxin-producing Escherichia coli (STEC) are globally significant foodborne pathogens. Dairy calves are a known reservoir of both O157 and non-O157 STEC. The objective of this study was to comprehensively evaluate the genomic attributes, diversity, virulence factors, and antimicrobial resistance gene (ARG) profiles of the STEC from preweaned and postweaned dairy calves in commercial dairy herds.

METHODS: In total, 31 non-O157 STEC were identified as part of a larger study focused on the pangenome of >1000 E. coli isolates from the faeces of preweaned and postweaned dairy calves on commercial dairy farms. These 31 genomes were sequenced on an Illumina NextSeq500 platform.

RESULTS: Based on the phylogenetic analyses, the STEC isolates were determined to be polyphyletic, with at least three phylogroups: A (32%), B1 (58%), and G (3%). These phylogroups represented at least 16 sequence types and 11 serogroups, including two of the 'big six' serogroups, O103 and O111. Several Shiga toxin gene subtypes were identified in the genomes, including stx1a, stx2a, stx2c, stx2d, and stx2g. Using the ResFinder database, the majority of the isolates (>50%) were determined to be multidrug-resistant strains because they harboured genes conferring resistance to three or more classes of antimicrobials, including some of human health significance (e.g., β-lactams, macrolides, and fosfomycin). Additionally, non-O157 STEC strain persistence and transmission within a farm was observed.

CONCLUSION: Dairy calves are a reservoir of phylogenomically diverse multidrug-resistant non-O157 STEC. Information from this study may inform assessments of public health risk and guide preharvest prevention strategies focusing on STEC reservoirs.}, } @article {pmid36897406, year = {2023}, author = {Xu, Y and Kong, X and Guo, Y and Wang, R and Yao, X and Chen, X and Yan, T and Wu, D and Lu, Y and Dong, J and Zhu, Y and Chen, M and Cen, H and Jiang, L}, title = {Structural variations and environmental specificities of flowering time-related genes in Brassica napus.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {136}, number = {3}, pages = {42}, pmid = {36897406}, issn = {1432-2242}, support = {No. 32130076//Natural Science Foundation of China/ ; 31961143008//Natural Science Foundation of China/ ; 2021C02057//Key Science and Technology Project of Zhejiang Province/ ; }, mesh = {*Brassica napus/genetics ; Quantitative Trait Loci ; Genome-Wide Association Study ; Plant Breeding ; Genotype ; *Arabidopsis/genetics ; }, abstract = {We found that the flowering time order of accessions in a genetic population considerably varied across environments, and homolog copies of essential flowering time genes played different roles in different locations. Flowering time plays a critical role in determining the life cycle length, yield, and quality of a crop. However, the allelic polymorphism of flowering time-related genes (FTRGs) in Brassica napus, an important oil crop, remains unclear. Here, we provide high-resolution graphics of FTRGs in B. napus on a pangenome-wide scale based on single nucleotide polymorphism (SNP) and structural variation (SV) analyses. A total of 1337 FTRGs in B. napus were identified by aligning their coding sequences with Arabidopsis orthologs. Overall, 46.07% of FTRGs were core genes and 53.93% were variable genes. Moreover, 1.94%, 0.74%, and 4.49% FTRGs had significant presence-frequency differences (PFDs) between the spring and semi-winter, spring and winter, and winter and semi-winter ecotypes, respectively. SNPs and SVs across 1626 accessions of 39 FTRGs underlying numerous published qualitative trait loci were analyzed. Additionally, to identify FTRGs specific to an eco-condition, genome-wide association studies (GWASs) based on SNP, presence/absence variation (PAV), and SV were performed after growing and observing the flowering time order (FTO) of plants in a collection of 292 accessions at three locations in two successive years. It was discovered that the FTO of plants in a genetic population changed a lot across various environments, and homolog copies of some key FTRGs played different roles in different locations. This study revealed the molecular basis of the genotype-by-environment (G × E) effect on flowering and recommended a pool of candidate genes specific to locations for breeding selection.}, } @article {pmid36892794, year = {2023}, author = {Xu, Y and Cheng, T and Rao, Q and Zhang, S and Ma, YL}, title = {Comparative genomic analysis of Stenotrophomonas maltophilia unravels their genetic variations and versatility trait.}, journal = {Journal of applied genetics}, volume = {64}, number = {2}, pages = {351-360}, pmid = {36892794}, issn = {2190-3883}, mesh = {*Stenotrophomonas maltophilia/genetics ; Phylogeny ; Phenotype ; Genomics ; Genetic Variation ; }, abstract = {Stenotrophomonas maltophilia is a species with immensely broad phenotypic and genotypic diversity that could widely distribute in natural and clinical environments. However, little attention has been paid to reveal their genome plasticity to diverse environments. In the present study, a comparative genomic analysis of S. maltophilia isolated from clinical and natural sources was systematically explored its genetic diversity of 42 sequenced genomes. The results showed that S. maltophilia owned an open pan-genome and had strong adaptability to different environments. A total of 1612 core genes were existed with an average of 39.43% of each genome, and the shared core genes might be necessary to maintain the basic characteristics of those S. maltophilia strains. Based on the results of the phylogenetic tree, the ANI value, and the distribution of accessory genes, genes associated with the fundamental process of those strains from the same habitat were found to be mostly conserved in evolution. Isolates from the same habitat had a high degree of similarity in COG category, and the most significant KEGG pathways were mainly involved in carbohydrate and amino acid metabolism, indicating that genes related to essential processes were mostly conserved in evolution for the clinical and environmental settings. Meanwhile, the number of resistance and efflux pump gene was significantly higher in the clinical setting than that of in the environmental setting. Collectively, this study highlights the evolutionary relationships of S. maltophilia isolated from clinical and environmental sources, shedding new light on its genomic diversity.}, } @article {pmid36884376, year = {2023}, author = {Zhang, DD and Zhang, XJ and Wu, D and Li, BB and Liu, HC and Zhou, YG and Fang, BZ and Li, WJ and Cai, M}, title = {Aquiflexum gelatinilyticum sp. nov., isolated from river water.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {73}, number = {3}, pages = {}, doi = {10.1099/ijsem.0.005741}, pmid = {36884376}, issn = {1466-5034}, mesh = {*Fatty Acids/chemistry ; *Phospholipids/chemistry ; Rivers/microbiology ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Phylogeny ; Bacterial Typing Techniques ; DNA, Bacterial/genetics ; Base Composition ; Bacteroidetes ; Water/analysis ; }, abstract = {Two Gram-stain-negative, strictly aerobic, rod-shaped, non-motile and non-gliding bacteria, designated as XJ19-10[T] and XJ19-11, were isolated from river water in Xinjiang Uygur Autonomous Region, PR China. Cells of these strains were catalase-, oxidase- and gelatinase-positive and contained carotenoids but no flexirubins. Growth occurred at 10-30 °C, pH 7.0-9.0 and with 0-2.5% (w/v) NaCl. On the basis of the results of 16S rRNA gene sequence and genome analyses, the two isolates represented members of the genus Aquiflexum, and the closest relative was Aquiflexum aquatile Z0201[T] with 16S rRNA gene sequence pairwise similarities of 97.9-98.1%. Furthermore, the average nucleotide identities and digital DNA-DNA hybridization identities between the two isolates and other relatives were all less than 82.9 and 28.2 %, respectively, all below the species delineation thresholds. The results of pan-genomic analysis indicated that the type strain XJ19-10[T] shared 2813 core gene clusters with other three type strains of members of the genus Aquiflexum, as well as having 623 strain-specific clusters. The major polar lipids were phosphatidylethanolamine, phosphatidylcholine, an unidentified aminolipid and unidentified lipids. The predominant fatty acids (>10% of the total contents) were iso-C15 : 0, iso-C15 : 1G, iso-C17 : 0 3-OH and summed feature 9, and MK-7 was the respiratory quinone. On the basis of the results of phenotypic, physiological, chemotaxonomic and genotypic characterization, strains XJ19-10[T] and XJ19-11 are considered to represent a novel species, for which the name Aquiflexum gelatinilyticum sp. nov. is proposed. The type strain is XJ19-10[T] (=CGMCC 1.19385[T] =KCTC 92266[T]).}, } @article {pmid36882215, year = {2023}, author = {Lee, Y and Kim, JH and Yoon, JH and Lee, JS and Sukhoom, A and Kim, W}, title = {Description of Defluviimonas salinarum sp. nov. with the potential of benzene-degradation isolated from saltern in the Yellow Seacoast.}, journal = {FEMS microbiology letters}, volume = {370}, number = {}, pages = {}, doi = {10.1093/femsle/fnad018}, pmid = {36882215}, issn = {1574-6968}, mesh = {*Phospholipids/chemistry ; Seawater/microbiology ; Benzene ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; *Rhodobacteraceae ; Bacterial Typing Techniques ; DNA, Bacterial/genetics ; Fatty Acids/chemistry ; }, abstract = {Strain CAU 1641T was isolated from saltern collected in Ganghwa Island, Republic of Korea. The bacterium was an aerobic, Gram-negative, catalase-positive, oxidase-positive, motile, and rod-shaped bacterium. Cell of strain CAU 1641T could grow at 20-40°C and pH 6.0-9.0 with 1.0-3.0% (w/v) NaCl. Stain CAU 1641T shared high 16S rRNA gene sequence similarities with Defluviimonas aquaemixtae KCTC 42108T (98.0%), Defluviimonas denitrificans DSM 18921T (97.6%), and Defluviimonas aestuarii KACC 16442T (97.5%). Phylogenetic trees based on the 16S rRNA gene and the core-genome sequences indicated that strain CAU 1641T belonged to genus Defluviimonas. Strain CAU 1641T contained ubiquinone-10 (Q-10) as the sole respiratory quinone and and summed feature 8 (C18:1ω6c and/or C18:1ω7c) as the predominant fatty acid (86.1%). The pan-genome analysis indicated that the genomes of the strain CAU 1641T and 15 reference strains contain a small core genome. The Average Nucleotide Identity and digital DNA-DNA hybridization values among strain CAU 1641T and reference strains of the genus Defluviimonas were in the range of 77.6%-78.8% and 21.1-22.1%, respectively. The genome of strain CAU 1641T has several genes of benzene degradation. The genomic G + C content was 66.6%. Based on polyphasic and genomic analyses, strain CAU 1641T represents a novel species of the genus Defluviimonas, for which the name Defluviimonas salinarum sp. nov., is proposed. The type strain is CAU 1641T (= KCTC 92081T = MCCC 1K07180T).}, } @article {pmid36876113, year = {2023}, author = {Anderson, BD and Bisanz, JE}, title = {Challenges and opportunities of strain diversity in gut microbiome research.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1117122}, pmid = {36876113}, issn = {1664-302X}, abstract = {Just because two things are related does not mean they are the same. In analyzing microbiome data, we are often limited to species-level analyses, and even with the ability to resolve strains, we lack comprehensive databases and understanding of the importance of strain-level variation outside of a limited number of model organisms. The bacterial genome is highly plastic with gene gain and loss occurring at rates comparable or higher than de novo mutations. As such, the conserved portion of the genome is often a fraction of the pangenome which gives rise to significant phenotypic variation, particularly in traits which are important in host microbe interactions. In this review, we discuss the mechanisms that give rise to strain variation and methods that can be used to study it. We identify that while strain diversity can act as a major barrier in interpreting and generalizing microbiome data, it can also be a powerful tool for mechanistic research. We then highlight recent examples demonstrating the importance of strain variation in colonization, virulence, and xenobiotic metabolism. Moving past taxonomy and the species concept will be crucial for future mechanistic research to understand microbiome structure and function.}, } @article {pmid36875624, year = {2023}, author = {Nawae, W and Naktang, C and Charoensri, S and U-Thoomporn, S and Narong, N and Chusri, O and Tangphatsornruang, S and Pootakham, W}, title = {Resequencing of durian genomes reveals large genetic variations among different cultivars.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1137077}, pmid = {36875624}, issn = {1664-462X}, abstract = {Durian (Durio zibethinus), which yields the fruit known as the "King of Fruits," is an important economic crop in Southeast Asia. Several durian cultivars have been developed in this region. In this study, we resequenced the genomes of three popular durian cultivars in Thailand, including Kradumthong (KD), Monthong (MT), and Puangmanee (PM) to investigate genetic diversities of cultivated durians. KD, MT, and PM genome assemblies were 832.7, 762.6, and 821.6 Mb, and their annotations covered 95.7, 92.4, and 92.7% of the embryophyta core proteins, respectively. We constructed the draft durian pangenome and analyzed comparative genomes with related species in Malvales. Long terminal repeat (LTR) sequences and protein families in durian genomes had slower evolution rates than that in cotton genomes. However, protein families with transcriptional regulation function and protein phosphorylation function involved in abiotic and biotic stress responses appeared to evolve faster in durians. The analyses of phylogenetic relationships, copy number variations (CNVs), and presence/absence variations (PAVs) suggested that the genome evolution of Thai durians was different from that of the Malaysian durian, Musang King (MK). Among the three newly sequenced genomes, the PAV and CNV profiles of disease resistance genes and the expressions of methylesterase inhibitor domain containing genes involved in flowering and fruit maturation in MT were different from those in KD and PM. These genome assemblies and their analyses provide valuable resources to gain a better understanding of the genetic diversity of cultivated durians, which may be useful for the future development of new durian cultivars.}, } @article {pmid36875612, year = {2023}, author = {Shirasawa, K and Moraga, R and Ghelfi, A and Hirakawa, H and Nagasaki, H and Ghamkhar, K and Barrett, BA and Griffiths, AG and Isobe, SN}, title = {An improved reference genome for Trifolium subterraneum L. provides insight into molecular diversity and intra-specific phylogeny.}, journal = {Frontiers in plant science}, volume = {14}, number = {}, pages = {1103857}, pmid = {36875612}, issn = {1664-462X}, abstract = {Subterranean clover (Trifolium subterraneum L., Ts) is a geocarpic, self-fertile annual forage legume with a compact diploid genome (n = x = 8, 544 Mb/1C). Its resilience and climate adaptivity have made it an economically important species in Mediterranean and temperate zones. Using the cultivar Daliak, we generated higher resolution sequence data, created a new genome assembly TSUd_3.0, and conducted molecular diversity analysis for copy number variant (CNV) and single-nucleotide polymorphism (SNP) among 36 cultivars. TSUd_3.0 substantively improves prior genome assemblies with new Hi-C and long-read sequence data, covering 531 Mb, containing 41,979 annotated genes and generating a 94.4% BUSCO score. Comparative genomic analysis among select members of the tribe Trifolieae indicated TSUd 3.0 corrects six assembly-error inversion/duplications and confirmed phylogenetic relationships. Its synteny with T. pratense, T. repens, Medicago truncatula and Lotus japonicus genomes were assessed, with the more distantly related T. repens and M. truncatula showing higher levels of co-linearity with Ts than between Ts and its close relative T. pratense. Resequencing of 36 cultivars discovered 7,789,537 SNPs subsequently used for genomic diversity assessment and sequence-based clustering. Heterozygosity estimates ranged from 1% to 21% within the 36 cultivars and may be influenced by admixture. Phylogenetic analysis supported subspecific genetic structure, although it indicates four or five groups, rather than the three recognized subspecies. Furthermore, there were incidences where cultivars characterized as belonging to a particular subspecies clustered with another subspecies when using genomic data. These outcomes suggest that further investigation of Ts sub-specific classification using molecular and morpho-physiological data is needed to clarify these relationships. This upgraded reference genome, complemented with comprehensive sequence diversity analysis of 36 cultivars, provides a platform for future gene functional analysis of key traits, and genome-based breeding strategies for climate adaptation and agronomic performance. Pangenome analysis, more in-depth intra-specific phylogenomic analysis using the Ts core collection, and functional genetic and genomic studies are needed to further augment knowledge of Trifolium genomes.}, } @article {pmid36871069, year = {2023}, author = {Nowinski, B and Feng, X and Preston, CM and Birch, JM and Luo, H and Whitman, WB and Moran, MA}, title = {Ecological divergence of syntopic marine bacterial species is shaped by gene content and expression.}, journal = {The ISME journal}, volume = {17}, number = {6}, pages = {813-822}, pmid = {36871069}, issn = {1751-7370}, mesh = {RNA, Ribosomal, 16S/genetics/analysis ; *Genes, Bacterial ; *Rhodobacteraceae/genetics ; Phytoplankton/genetics ; Genomics ; Phylogeny ; Genome, Bacterial ; Seawater/microbiology ; }, abstract = {Identifying mechanisms by which bacterial species evolve and maintain genomic diversity is particularly challenging for the uncultured lineages that dominate the surface ocean. A longitudinal analysis of bacterial genes, genomes, and transcripts during a coastal phytoplankton bloom revealed two co-occurring, highly related Rhodobacteraceae species from the deeply branching and uncultured NAC11-7 lineage. These have identical 16S rRNA gene amplicon sequences, yet their genome contents assembled from metagenomes and single cells indicate species-level divergence. Moreover, shifts in relative dominance of the species during dynamic bloom conditions over 7 weeks confirmed the syntopic species' divergent responses to the same microenvironment at the same time. Genes unique to each species and genes shared but divergent in per-cell inventories of mRNAs accounted for 5% of the species' pangenome content. These analyses uncover physiological and ecological features that differentiate the species, including capacities for organic carbon utilization, attributes of the cell surface, metal requirements, and vitamin biosynthesis. Such insights into the coexistence of highly related and ecologically similar bacterial species in their shared natural habitat are rare.}, } @article {pmid36864624, year = {2023}, author = {Deorowicz, S and Danek, A and Li, H}, title = {AGC: compact representation of assembled genomes with fast queries and updates.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {3}, pages = {}, pmid = {36864624}, issn = {1367-4811}, support = {U01 HG010971/HG/NHGRI NIH HHS/United States ; R01 HG010040/HG/NHGRI NIH HHS/United States ; U01 HG010961/HG/NHGRI NIH HHS/United States ; }, mesh = {Sequence Analysis, DNA ; *Genome ; *Software ; High-Throughput Nucleotide Sequencing ; }, abstract = {MOTIVATION: High-quality sequence assembly is the ultimate representation of complete genetic information of an individual. Several ongoing pangenome projects are producing collections of high-quality assemblies of various species. Each project has already generated assemblies of hundreds of gigabytes on disk, greatly impeding the distribution of and access to such rich datasets.

RESULTS: Here, we show how to reduce the size of the sequenced genomes by 2-3 orders of magnitude. Our tool compresses the genomes significantly better than the existing programs and is much faster. Moreover, its unique feature is the ability to access any contig (or its part) in a fraction of a second and easily append new samples to the compressed collections. Thanks to this, AGC could be useful not only for backup or transfer purposes but also for routine analysis of pangenome sequences in common pipelines. With the rapidly reduced cost and improved accuracy of sequencing technologies, we anticipate more comprehensive pangenome projects with much larger sample sizes. AGC is likely to become a foundation tool to store, distribute and access pangenome data.

The source code of AGC is available at https://github.com/refresh-bio/agc. The package can be installed via Bioconda at https://anaconda.org/bioconda/agc.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid36864101, year = {2023}, author = {Yan, H and Sun, M and Zhang, Z and Jin, Y and Zhang, A and Lin, C and Wu, B and He, M and Xu, B and Wang, J and Qin, P and Mendieta, JP and Nie, G and Wang, J and Jones, CS and Feng, G and Srivastava, RK and Zhang, X and Bombarely, A and Luo, D and Jin, L and Peng, Y and Wang, X and Ji, Y and Tian, S and Huang, L}, title = {Pangenomic analysis identifies structural variation associated with heat tolerance in pearl millet.}, journal = {Nature genetics}, volume = {55}, number = {3}, pages = {507-518}, pmid = {36864101}, issn = {1546-1718}, mesh = {*Pennisetum/genetics ; *Thermotolerance/genetics ; Adaptation, Physiological/genetics ; Genomics ; Gene Expression Profiling ; }, abstract = {Pearl millet is an important cereal crop worldwide and shows superior heat tolerance. Here, we developed a graph-based pan-genome by assembling ten chromosomal genomes with one existing assembly adapted to different climates worldwide and captured 424,085 genomic structural variations (SVs). Comparative genomics and transcriptomics analyses revealed the expansion of the RWP-RK transcription factor family and the involvement of endoplasmic reticulum (ER)-related genes in heat tolerance. The overexpression of one RWP-RK gene led to enhanced plant heat tolerance and transactivated ER-related genes quickly, supporting the important roles of RWP-RK transcription factors and ER system in heat tolerance. Furthermore, we found that some SVs affected the gene expression associated with heat tolerance and SVs surrounding ER-related genes shaped adaptation to heat tolerance during domestication in the population. Our study provides a comprehensive genomic resource revealing insights into heat tolerance and laying a foundation for generating more robust crops under the changing climate.}, } @article {pmid36854668, year = {2023}, author = {Liu, J and Dawe, RK}, title = {Large haplotypes highlight a complex age structure within the maize pan-genome.}, journal = {Genome research}, volume = {33}, number = {3}, pages = {359-370}, pmid = {36854668}, issn = {1549-5469}, mesh = {Haplotypes ; *Zea mays/genetics ; *Centromere/genetics ; Genome, Plant ; Genomics/methods ; }, abstract = {The genomes of maize and other eukaryotes contain stable haplotypes in regions of low recombination. These regions, including centromeres, long heterochromatic blocks, and rDNA arrays, have been difficult to analyze with respect to their diversity and origin. Greatly improved genome assemblies are now available that enable comparative genomics over these and other nongenic spaces. Using 26 complete maize genomes, we developed methods to align intergenic sequences while excluding genes and regulatory regions. The centromere haplotypes (cenhaps) extend for megabases on either side of the functional centromere regions and appear as evolutionary strata, with haplotype divergence/coalescence times dating as far back as 450 thousand years ago (kya). Application of the same methods to other low recombination regions (heterochromatic knobs and rDNA) and all intergenic spaces revealed that deep coalescence times are ubiquitous across the maize pan-genome. Divergence estimates vary over a broad timescale with peaks at ∼16 and 300 kya, reflecting a complex history of gene flow among diverging populations and changes in population size associated with domestication. Cenhaps and other long haplotypes provide vivid displays of this ancient diversity.}, } @article {pmid36853054, year = {2023}, author = {Du, Y and Zou, J and Yin, Z and Chen, T}, title = {Pan-Chromosome and Comparative Analysis of Agrobacterium fabrum Reveal Important Traits Concerning the Genetic Diversity, Evolutionary Dynamics, and Niche Adaptation of the Species.}, journal = {Microbiology spectrum}, volume = {11}, number = {2}, pages = {e0292422}, pmid = {36853054}, issn = {2165-0497}, abstract = {Agrobacterium fabrum has been critical for the development of plant genetic engineering and agricultural biotechnology due to its ability to transform eukaryotic cells. However, the gene composition, evolutionary dynamics, and niche adaptation of this species is still unknown. Therefore, we established a comparative genomic analysis based on a pan-chromosome data set to evaluate the genetic diversity of A. fabrum. Here, 25 A. fabrum genomes were selected for analysis by core genome phylogeny combined with the average nucleotide identity (ANI), amino acid identity (AAI), and in silico DNA-DNA hybridization (DDH) values. An open pan-genome of A. fabrum exhibits genetic diversity with variable accessorial genes as evidenced by a consensus pan-genome of 12 representative genomes. The genomic plasticity of A. fabrum is apparent in its putative sequences for mobile genetic elements (MGEs), limited horizontal gene transfer barriers, and potentially horizontally transferred genes. The evolutionary constraints and functional enrichment in the pan-chromosome were measured by the Clusters of Orthologous Groups (COG) categories using eggNOG-mapper software, and the nonsynonymous/synonymous rate ratio (dN/dS) was determined using HYPHY software. Comparative analysis revealed significant differences in the functional enrichment and the degree of purifying selection between the core genome and non-core genome. We demonstrate that the core gene families undergo stronger purifying selection but have a significant bias to contain one or more positively selected sites. Furthermore, although they shared similar genetic diversity, we observed significant differences between chromosome 1 (Chr I) and the chromid in their functional features and evolutionary constraints. We demonstrate that putative genetic elements responsible for plant infection, ecological adaptation, and speciation represent the core genome, highlighting their importance in the adaptation of A. fabrum to plant-related niches. Our pan-chromosome analysis of A. fabrum provides comprehensive insights into the genetic properties, evolutionary patterns, and niche adaptation of the species. IMPORTANCE Agrobacterium spp. live in diverse plant-associated niches such as soil, the rhizosphere, and vegetation, which are challenged by multiple stressors such as diverse energy sources, plant defenses, and microbial competition. They have evolved the ability to utilize diverse resources, escape plant defenses, and defeat competitors. However, the underlying genetic diversity and evolutionary dynamics of Agrobacterium spp. remain unexplored. We examined the phylogeny and pan-genome of A. fabrum to define intraspecies evolutionary relationships. Our results indicate an open pan-genome and numerous MGEs and horizontally transferred genes among A. fabrum genomes, reflecting the flexibility of the chromosomes and the potential for genetic exchange. Furthermore, we observed significant differences in the functional features and evolutionary constraints between the core and accessory genomes and between Chr I and the chromid, respectively.}, } @article {pmid36852268, year = {2023}, author = {Jiang, YF and Wang, S and Wang, CL and Xu, RH and Wang, WW and Jiang, Y and Wang, MS and Jiang, L and Dai, LH and Wang, JR and Chu, XH and Zeng, YQ and Fang, LZ and Wu, DD and Zhang, Q and Ding, XD}, title = {Pangenome obtained by long-read sequencing of 11 genomes reveal hidden functional structural variants in pigs.}, journal = {iScience}, volume = {26}, number = {3}, pages = {106119}, pmid = {36852268}, issn = {2589-0042}, abstract = {Long-read sequencing (LRS) facilitates both the genome assembly and the discovery of structural variants (SVs). Here, we built a graph-based pig pangenome by incorporating 11 LRS genomes with an average of 94.01% BUSCO completeness score, revealing 206-Mb novel sequences. We discovered 183,352 nonredundant SVs (63% novel), representing 12.12% of the reference genome. By genotyping SVs in an additional 196 short-read sequencing samples, we identified thousands of population stratified SVs. Particularly, we detected 7,568 Tibetan specific SVs, some of which demonstrate significant population differentiation between Tibetan and low-altitude pigs, which might be associated with the high-altitude hypoxia adaptation in Tibetan pigs. Further integrating functional genomic data, the most promising candidate genes within the SVs that might contribute to the high-altitude hypoxia adaptation were discovered. Overall, our study generates a benchmark pangenome resource for illustrating the important roles of SVs in adaptive evolution, domestication, and genetic improvement of agronomic traits in pigs.}, } @article {pmid36851839, year = {2023}, author = {Dallinger, HG and Löschenberger, F and Azrak, N and Ametz, C and Michel, S and Bürstmayr, H}, title = {Genome-wide association mapping for pre-harvest sprouting in European winter wheat detects novel resistance QTL, pleiotropic effects, and structural variation in multiple genomes.}, journal = {The plant genome}, volume = {}, number = {}, pages = {e20301}, doi = {10.1002/tpg2.20301}, pmid = {36851839}, issn = {1940-3372}, support = {BeyondEuropeprojectCAWINT : 855737//Österreichische Forschungsförderungsgesellschaft/ ; }, abstract = {Pre-harvest sprouting (PHS), germination of seeds before harvest, is a major problem in global wheat (Triticum aestivum L.) production, and leads to reduced bread-making quality in affected grain. Breeding for PHS resistance can prevent losses under adverse conditions. Selecting resistant lines in years lacking pre-harvest rain, requires challenging of plants in the field or in the laboratory or using genetic markers. Despite the availability of a wheat reference and pan-genome, linking markers, genes, allelic, and structural variation, a complete understanding of the mechanisms underlying various sources of PHS resistance is still lacking. Therefore, we challenged a population of European wheat varieties and breeding lines with PHS conditions and phenotyped them for PHS traits, grain quality, phenological and agronomic traits to conduct genome-wide association mapping. Furthermore, we compared these marker-trait associations to previously reported PHS loci and evaluated their usefulness for breeding. We found markers associated with PHS on all chromosomes, with strong evidence for novel quantitative trait locus/loci (QTL) on chromosome 1A and 5B. The QTL on chromosome 1A lacks pleiotropic effect, for the QTL on 5B we detected pleiotropic effects on phenology and grain quality. Multiple peaks on chromosome 4A co-located with the major resistance locus Phs-A1, for which two causal genes, TaPM19 and TaMKK3, have been proposed. Mapping markers and genes to the pan-genome and chromosomal alignments provide evidence for structural variation around this major PHS-resistance locus. Although PHS is controlled by many loci distributed across the wheat genome, Phs-A1 on chromosome 4A seems to be the most effective and widely deployed source of resistance, in European wheat varieties.}, } @article {pmid36851180, year = {2023}, author = {Chandrasekar, SS and Kingstad-Bakke, BA and Wu, CW and Phanse, Y and Osorio, JE and Talaat, AM}, title = {A DNA Prime and MVA Boost Strategy Provides a Robust Immunity against Infectious Bronchitis Virus in Chickens.}, journal = {Vaccines}, volume = {11}, number = {2}, pages = {}, pmid = {36851180}, issn = {2076-393X}, support = {2020-67021-31256//United States Department of Agriculture/ ; //Wisconsin Alumni Research Foundation/ ; }, abstract = {Infectious bronchitis (IB) is an acute respiratory disease of chickens caused by the avian coronavirus Infectious Bronchitis Virus (IBV). Modified Live Virus (MLV) vaccines used commercially can revert to virulence in the field, recombine with circulating serotypes, and cause tissue damage in vaccinated birds. Previously, we showed that a mucosal adjuvant system, QuilA-loaded Chitosan (QAC) nanoparticles encapsulating plasmid vaccine encoding for IBV nucleocapsid (N), is protective against IBV. Herein, we report a heterologous vaccination strategy against IBV, where QAC-encapsulated plasmid immunization is followed by Modified Vaccinia Ankara (MVA) immunization, both expressing the same IBV-N antigen. This strategy led to the initiation of robust T-cell responses. Birds immunized with the heterologous vaccine strategy had reduced clinical severity and >two-fold reduction in viral burden in lachrymal fluid and tracheal swabs post-challenge compared to priming and boosting with the MVA-vectored vaccine alone. The outcomes of this study indicate that the heterologous vaccine platform is more immunogenic and protective than a homologous MVA prime/boost vaccination strategy.}, } @article {pmid36848567, year = {2023}, author = {Sierra-Patev, S and Min, B and Naranjo-Ortiz, M and Looney, B and Konkel, Z and Slot, JC and Sakamoto, Y and Steenwyk, JL and Rokas, A and Carro, J and Camarero, S and Ferreira, P and Molpeceres, G and Ruiz-Dueñas, FJ and Serrano, A and Henrissat, B and Drula, E and Hughes, KW and Mata, JL and Ishikawa, NK and Vargas-Isla, R and Ushijima, S and Smith, CA and Donoghue, J and Ahrendt, S and Andreopoulos, W and He, G and LaButti, K and Lipzen, A and Ng, V and Riley, R and Sandor, L and Barry, K and Martínez, AT and Xiao, Y and Gibbons, JG and Terashima, K and Grigoriev, IV and Hibbett, D}, title = {A global phylogenomic analysis of the shiitake genus Lentinula.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {10}, pages = {e2214076120}, pmid = {36848567}, issn = {1091-6490}, support = {/HHMI/Howard Hughes Medical Institute/United States ; R56 AI146096/AI/NIAID NIH HHS/United States ; }, mesh = {*Lentinula ; Phylogeny ; Asia, Eastern ; Thailand ; }, abstract = {Lentinula is a broadly distributed group of fungi that contains the cultivated shiitake mushroom, L. edodes. We sequenced 24 genomes representing eight described species and several unnamed lineages of Lentinula from 15 countries on four continents. Lentinula comprises four major clades that arose in the Oligocene, three in the Americas and one in Asia-Australasia. To expand sampling of shiitake mushrooms, we assembled 60 genomes of L. edodes from China that were previously published as raw Illumina reads and added them to our dataset. Lentinula edodes sensu lato (s. lat.) contains three lineages that may warrant recognition as species, one including a single isolate from Nepal that is the sister group to the rest of L. edodes s. lat., a second with 20 cultivars and 12 wild isolates from China, Japan, Korea, and the Russian Far East, and a third with 28 wild isolates from China, Thailand, and Vietnam. Two additional lineages in China have arisen by hybridization among the second and third groups. Genes encoding cysteine sulfoxide lyase (lecsl) and γ-glutamyl transpeptidase (leggt), which are implicated in biosynthesis of the organosulfur flavor compound lenthionine, have diversified in Lentinula. Paralogs of both genes that are unique to Lentinula (lecsl 3 and leggt 5b) are coordinately up-regulated in fruiting bodies of L. edodes. The pangenome of L. edodes s. lat. contains 20,308 groups of orthologous genes, but only 6,438 orthogroups (32%) are shared among all strains, whereas 3,444 orthogroups (17%) are found only in wild populations, which should be targeted for conservation.}, } @article {pmid36844929, year = {2023}, author = {Nielsen, FD and Møller-Jensen, J and Jørgensen, MG}, title = {Adding context to the pneumococcal core genes using bioinformatic analysis of the intergenic pangenome of Streptococcus pneumoniae.}, journal = {Frontiers in bioinformatics}, volume = {3}, number = {}, pages = {1074212}, pmid = {36844929}, issn = {2673-7647}, abstract = {Introduction: Whole genome sequencing offers great opportunities for linking genotypes to phenotypes aiding in our understanding of human disease and bacterial pathogenicity. However, these analyses often overlook non-coding intergenic regions (IGRs). By disregarding the IGRs, crucial information is lost, as genes have little biological function without expression. Methods/Results: In this study, we present the first complete pangenome of the important human pathogen Streptococcus pneumoniae (pneumococcus), spanning both the genes and IGRs. We show that the pneumococcus species retains a small core genome of IGRs that are present across all isolates. Gene expression is highly dependent on these core IGRs, and often several copies of these core IGRs are found across each genome. Core genes and core IGRs show a clear linkage as 81% of core genes are associated with core IGRs. Additionally, we identify a single IGR within the core genome that is always occupied by one of two highly distinct sequences, scattered across the phylogenetic tree. Discussion: Their distribution indicates that this IGR is transferred between isolates through horizontal regulatory transfer independent of the flanking genes and that each type likely serves different regulatory roles depending on their genetic context.}, } @article {pmid36838392, year = {2023}, author = {Sugrue, I and Hill, D and O'Connor, PM and Day, L and Stanton, C and Hill, C and Ross, RP}, title = {Nisin E Is a Novel Nisin Variant Produced by Multiple Streptococcus equinus Strains.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, pmid = {36838392}, issn = {2076-2607}, support = {SFI/12/RC/2273//Science Foundation Ireland/Ireland ; }, abstract = {Nisin A, the prototypical lantibiotic, is an antimicrobial peptide currently utilised as a food preservative, with potential for therapeutic applications. Here, we describe nisin E, a novel nisin variant produced by two Streptococcus equinus strains, APC4007 and APC4008, isolated from sheep milk. Shotgun whole genome sequencing and analysis revealed biosynthetic gene clusters similar to nisin U, with a unique rearrangement of the core peptide encoding gene within the cluster. The 3100.8 Da peptide by MALDI-TOF mass spectrometry, is 75% identical to nisin A, with 10 differences, including 2 deletions: Ser29 and Ile30, and 8 substitutions: Ile4Lys, Gly18Thr, Asn20Pro, Met21Ile, His27Gly, Val32Phe, Ser33Gly, and Lys34Asn. Nisin E producing strains inhibited species of Lactobacillus, Bacillus, and Clostridiodes and were immune to nisin U. Sequence alignment identified putative promoter sequences across the nisin producer genera, allowing for the prediction of genes in Streptococcus to be potentially regulated by nisin. S. equinus pangenome BLAST analyses detected 6 nisin E operons across 44 publicly available genomes. An additional 20 genomes contained a subset of nisin E transport/immunity and regulatory genes (nseFEGRK), without adjacent peptide production genes. These genes suggest that nisin E response mechanisms, distinct from the canonical nisin immunity and resistance operons, are widespread across the S. equinus species. The discovery of this new nisin variant and its immunity determinants in S. equinus suggests a central role for nisin in the competitive nature of the species.}, } @article {pmid36838372, year = {2023}, author = {Jiang, S and Fan, Q and Zhang, Z and Deng, Y and Wang, L and Dai, Q and Wang, J and Lin, M and Zhou, J and Long, Z and He, G and Zhou, Z}, title = {Biodegradation of Oil by a Newly Isolated Strain Acinetobacter junii WCO-9 and Its Comparative Pan-Genome Analysis.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, pmid = {36838372}, issn = {2076-2607}, support = {2018YFA0901000, 2018YFA0901003//National Key R&D Program of China/ ; NJ202201//Open Fund of Key Laboratory in Luzhou/ ; }, abstract = {Waste oil pollution and the treatment of oily waste present a challenge, and the exploitation of microbial resources is a safe and efficient method to resolve these problems. Lipase-producing microorganisms can directly degrade waste oil and promote the degradation of oily waste and, therefore, have very significant research and application value. The isolation of efficient oil-degrading strains is of great practical significance in research into microbial remediation in oil-contaminated environments and for the enrichment of the microbial lipase resource library. In this study, Acinetobacter junii WCO-9, an efficient oil-degrading bacterium, was isolated from an oil-contaminated soil using olive oil as the sole carbon source, and its enzyme activity of ρ-nitrophenyl decanoate (ρ-NPD) decomposition was 3000 U/L. The WCO-9 strain could degrade a variety of edible oils, and its degradation capability was significantly better than that of the control strain, A junii ATCC 17908. Comparative pan-genome and lipid degradation pathway analyses indicated that A. junii isolated from the same environment shared a similar set of core genes and that the species accumulated more specific genes that facilitated resistance to environmental stresses under different environmental conditions. WCO-9 has accumulated a complete set of oil metabolism genes under a long-term oil-contamination environment, and the compact arrangement of abundant lipase and lipase chaperones has further strengthened the ability of the strain to survive in such environments. This is the main reason why WCO-9 is able to degrade oil significantly more effectively than ATCC 17908. In addition, WCO-9 possesses a specific lipase that is not found in homologous strains. In summary, A. junii WCO-9, with a complete triglyceride degradation pathway and the specific lipase gene, has great potential in environmental remediation and lipase for industry.}, } @article {pmid36838305, year = {2023}, author = {Gonçalves-Oliveira, J and Gutierrez, R and Schlesener, CL and Jaffe, DA and Aguilar-Setién, A and Boulouis, HJ and Nachum-Biala, Y and Huang, BC and Weimer, BC and Chomel, BB and Harrus, S}, title = {Genomic Characterization of Three Novel Bartonella Strains in a Rodent and Two Bat Species from Mexico.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, pmid = {36838305}, issn = {2076-2607}, abstract = {Rodents and bats are the most diverse mammal group that host Bartonella species. In the Americas, they were described as harboring Bartonella species; however, they were mostly characterized to the genotypic level. We describe here Bartonella isolates obtained from blood samples of one rodent (Peromyscus yucatanicus from San José Pibtuch, Yucatan) and two bat species (Desmodus rotundus from Progreso, and Pteronotus parnellii from Chamela-Cuitzmala) from Mexico. We sequenced and described the genomic features of three Bartonella strains and performed phylogenomic and pangenome analyses to decipher their phylogenetic relationships. The mouse-associated genome was closely related to Bartonella vinsonii. The two bat-associated genomes clustered into a single distinct clade in between lineages 3 and 4, suggesting to be an ancestor of the rodent-associated Bartonella clade (lineage 4). These three genomes showed <95% OrthoANI values compared to any other Bartonella genome, and therefore should be considered as novel species. In addition, our analyses suggest that the B. vinsonii complex should be revised, and all B. vinsonii subspecies need to be renamed and considered as full species. The phylogenomic clustering of the bat-associated Bartonella strains and their virulence factor profile (lack of the Vbh/TraG conjugation system remains of the T4SS) suggest that it should be considered as a new lineage clade (L5) within the Bartonella genus.}, } @article {pmid36838222, year = {2023}, author = {Mughal, SR and Niazi, SA and Do, T and Gilbert, SC and Didelot, X and Radford, DR and Beighton, D}, title = {Genomic Diversity among Actinomyces naeslundii Strains and Closely Related Species.}, journal = {Microorganisms}, volume = {11}, number = {2}, pages = {}, pmid = {36838222}, issn = {2076-2607}, abstract = {The aim of this study was to investigate and clarify the ambiguous taxonomy of Actinomyces naeslundii and its closely related species using state-of-the-art high-throughput sequencing techniques, and, furthermore, to determine whether sub-clusters identified within Actinomyces oris and Actinomyces naeslundii in a previous study by multi locus sequence typing (MLST) using concatenation of seven housekeeping genes should either be classified as subspecies or distinct species. The strains in this study were broadly classified under Actinomyces naeslundii group as A. naeslundii genospecies I and genospecies II. Based on MLST data analysis, these were further classified as A. oris and A. naeslundii. The whole genome sequencing of selected strains of A. oris (n = 17) and A. naeslundii (n = 19) was carried out using Illumina Genome Analyzer IIxe and Roche 454 allowing paired-end and single-reads sequencing, respectively. The sequences obtained were aligned using CLC Genomic workbench version 5.1 and annotated using RAST (Rapid Annotation using Subsystem Technology) release version 59 accessible online. Additionally, genomes of seven publicly available strains of Actinomyces (k20, MG1, c505, OT175, OT171, OT170, and A. johnsonii) were also included. Comparative genomic analysis (CGA) using Mauve, Progressive Mauve, gene-by-gene, Core, and Pan Genome, and finally Digital DNA-DNA homology (DDH) analysis was carried out. DDH values were obtained using in silico genome-genome comparison. Evolutionary analysis using ClonalFrame was also undertaken. The mutation and recombination events were compared using chi-square test among A. oris and A. naeslundii isolates (analysis methods are not included in the study). CGA results were consistent with previous traditional classification using MLST. It was found that strains of Actinomyces k20, MG1, c505, and OT175 clustered in A. oris group of isolates, while OT171, OT170, and A. johnsonii appeared as separate branches. Similar clustering to MLST was observed for other isolates. The mutation and recombination events were significantly higher in A. oris than A. naeslundii, highlighting the diversity of A. oris strains in the oral cavity. These findings suggest that A. oris forms six distinct groups, whereas A. naeslundii forms three. The correct designation of isolates will help in the identification of clinical Actinomyces isolates found in dental plaque. Easily accessible online genomic sequence data will also accelerate the investigation of the biochemical characterisation and pathogenesis of this important group of micro-organisms.}, } @article {pmid36836896, year = {2023}, author = {Jalal, K and Khan, K and Hayat, A and Alnasser, SM and Meshal, A and Basharat, Z}, title = {Pan-Genomics of Escherichia albertii for Antibiotic Resistance Profiling in Different Genome Fractions and Natural Product Mediated Intervention: In Silico Approach.}, journal = {Life (Basel, Switzerland)}, volume = {13}, number = {2}, pages = {}, pmid = {36836896}, issn = {2075-1729}, abstract = {Escherichia albertii is an emerging, enteric pathogen of significance. It was first isolated in 2003 from a pediatric diarrheal sample from Bangladesh. In this study, a comprehensive in silico strategy was followed to first list out antibiotic-resistant genes from core, accessory and unique genome fractions of 95 available genomes of E. albertii. Then, 56 drug targets were identified from the core essential genome. Finally, ZipA, an essential cell division protein that stabilizes the FtsZ protofilaments by cross-linking them and serves as a cytoplasmic membrane anchor for the Z ring, was selected for further downstream processing. It was computationally modeled using a threading approach, followed by virtual screening of two phytochemical libraries, Ayurvedic (n = 2103 compounds) and Traditional Chinese Medicine (n = 36,043 compounds). ADMET profiling, followed by PBPK modeling in the central body compartment, in a population of 250 non-diseased, 250 cirrhotic and 250 renally impaired people was attempted. ZINC85624912 from Chinese medicinal library showed the highest bioavailability and plasma retention. This is the first attempt to simulate the fate of natural products in the body through PBPK. Dynamics simulation of 20 ns for the top three compounds from both libraries was also performed to validate the stability of the compounds. The obtained information from the current study could aid wet-lab scientists to work on the scaffold of screened drug-like compounds from natural resources and could be useful in our quest for therapy against antibiotic-resistant E. albertii.}, } @article {pmid36835570, year = {2023}, author = {Balabanova, L and Nedashkovskaya, O and Otstavnykh, N and Isaeva, M and Kolpakova, O and Pentehina, I and Seitkalieva, A and Noskova, Y and Stepochkina, V and Son, O and Tekutyeva, L}, title = {Computational Insight into Intraspecies Distinctions in Pseudoalteromonas distincta: Carotenoid-like Synthesis Traits and Genomic Heterogeneity.}, journal = {International journal of molecular sciences}, volume = {24}, number = {4}, pages = {}, pmid = {36835570}, issn = {1422-0067}, support = {075-11-2021-065//the Ministry of Science and Higher Education of the Russian Federation within the framework of Decree of the Government of the Russian Federation № 218/ ; }, mesh = {*Pseudoalteromonas/genetics ; Genomics ; Carotenoids/metabolism ; Glycosylation ; Phenotype ; Phylogeny ; }, abstract = {Advances in the computational annotation of genomes and the predictive potential of current metabolic models, based on more than thousands of experimental phenotypes, allow them to be applied to identify the diversity of metabolic pathways at the level of ecophysiology differentiation within taxa and to predict phenotypes, secondary metabolites, host-associated interactions, survivability, and biochemical productivity under proposed environmental conditions. The significantly distinctive phenotypes of members of the marine bacterial species Pseudoalteromonas distincta and an inability to use common molecular markers make their identification within the genus Pseudoalteromonas and prediction of their biotechnology potential impossible without genome-scale analysis and metabolic reconstruction. A new strain, KMM 6257, of a carotenoid-like phenotype, isolated from a deep-habituating starfish, emended the description of P. distincta, particularly in the temperature growth range from 4 to 37 °C. The taxonomic status of all available closely related species was elucidated by phylogenomics. P. distincta possesses putative methylerythritol phosphate pathway II and 4,4'-diapolycopenedioate biosynthesis, related to C30 carotenoids, and their functional analogues, aryl polyene biosynthetic gene clusters (BGC). However, the yellow-orange pigmentation phenotypes in some strains coincide with the presence of a hybrid BGC encoding for aryl polyene esterified with resorcinol. The alginate degradation and glycosylated immunosuppressant production, similar to brasilicardin, streptorubin, and nucleocidines, are the common predicted features. Starch, agar, carrageenan, xylose, lignin-derived compound degradation, polysaccharide, folate, and cobalamin biosynthesis are all strain-specific.}, } @article {pmid36834516, year = {2023}, author = {Li, H and Tahir Ul Qamar, M and Yang, L and Liang, J and You, J and Wang, L}, title = {Current Progress, Applications and Challenges of Multi-Omics Approaches in Sesame Genetic Improvement.}, journal = {International journal of molecular sciences}, volume = {24}, number = {4}, pages = {}, pmid = {36834516}, issn = {1422-0067}, support = {CAAS-ASTIP-2016-OCRI//the Agricultural Science and Technology Innovation Project of the Chinese Academy of Agricultural Sciences/ ; 2020BBA045//the Key Research Projects of Hubei province/ ; 2021-620-000-001-035//the Science and Technology Innovation Project of Hubei province/ ; Y2022XK11//the Fundamental Research Funds for Central Non-profit Scientific Institution/ ; KF2022002//the Open Project of Key Laboratory of Biology and Genetic Improvement of Oil Crops, Ministry of Agriculture and Rural Affairs, P.R. China/ ; CARS-14//China Agriculture Research System/ ; 2022020801020299//the Knowledge Innovation Program of Wuhan-Shuguang Project/ ; 1610172022010//the Central Public-interest Scientific Institution Basal Research Fund/ ; }, mesh = {*Sesamum/genetics ; Multiomics ; Plant Breeding ; Genomics/methods ; Proteomics/methods ; }, abstract = {Sesame is one of the important traditional oil crops in the world, and has high economic and nutritional value. Recently, due to the novel high throughput sequencing techniques and bioinformatical methods, the study of the genomics, methylomics, transcriptomics, proteomics and metabonomics of sesame has developed rapidly. Thus far, the genomes of five sesame accessions have been released, including white and black seed sesame. The genome studies reveal the function and structure of the sesame genome, and facilitate the exploitation of molecular markers, the construction of genetic maps and the study of pan-genomes. Methylomics focus on the study of the molecular level changes under different environmental conditions. Transcriptomics provide a powerful tool to study abiotic/biotic stress, organ development, and noncoding RNAs, and proteomics and metabonomics also provide some support in studying abiotic stress and important traits. In addition, the opportunities and challenges of multi-omics in sesame genetics breeding were also described. This review summarizes the current research status of sesame from the perspectives of multi-omics and hopes to provide help for further in-depth research on sesame.}, } @article {pmid36833201, year = {2023}, author = {Liu, S and Jiao, J and Tian, CF}, title = {Adaptive Evolution of Rhizobial Symbiosis beyond Horizontal Gene Transfer: From Genome Innovation to Regulation Reconstruction.}, journal = {Genes}, volume = {14}, number = {2}, pages = {}, pmid = {36833201}, issn = {2073-4425}, mesh = {*Rhizobium/genetics ; Symbiosis/genetics ; Gene Transfer, Horizontal ; Ecosystem ; Nitrogen Fixation/genetics ; *Fabaceae/microbiology ; }, abstract = {There are ubiquitous variations in symbiotic performance of different rhizobial strains associated with the same legume host in agricultural practices. This is due to polymorphisms of symbiosis genes and/or largely unexplored variations in integration efficiency of symbiotic function. Here, we reviewed cumulative evidence on integration mechanisms of symbiosis genes. Experimental evolution, in concert with reverse genetic studies based on pangenomics, suggests that gain of the same circuit of key symbiosis genes through horizontal gene transfer is necessary but sometimes insufficient for bacteria to establish an effective symbiosis with legumes. An intact genomic background of the recipient may not support the proper expression or functioning of newly acquired key symbiosis genes. Further adaptive evolution, through genome innovation and reconstruction of regulation networks, may confer the recipient of nascent nodulation and nitrogen fixation ability. Other accessory genes, either co-transferred with key symbiosis genes or stochastically transferred, may provide the recipient with additional adaptability in ever-fluctuating host and soil niches. Successful integrations of these accessory genes with the rewired core network, regarding both symbiotic and edaphic fitness, can optimize symbiotic efficiency in various natural and agricultural ecosystems. This progress also sheds light on the development of elite rhizobial inoculants using synthetic biology procedures.}, } @article {pmid36831244, year = {2023}, author = {Apicella, C and Ruano, CSM and Thilaganathan, B and Khalil, A and Giorgione, V and Gascoin, G and Marcellin, L and Gaspar, C and Jacques, S and Murdoch, CE and Miralles, F and Méhats, C and Vaiman, D}, title = {Pan-Genomic Regulation of Gene Expression in Normal and Pathological Human Placentas.}, journal = {Cells}, volume = {12}, number = {4}, pages = {}, pmid = {36831244}, issn = {2073-4409}, mesh = {Humans ; Pregnancy ; Female ; *Placenta/metabolism ; *Trophoblasts/metabolism ; Transcriptome ; Gene Expression Regulation ; Genomics ; }, abstract = {In this study, we attempted to find genetic variants affecting gene expression (eQTL = expression Quantitative Trait Loci) in the human placenta in normal and pathological situations. The analysis of gene expression in placental diseases (Pre-eclampsia and Intra-Uterine Growth Restriction) is hindered by the fact that diseased placental tissue samples are generally taken at earlier gestations compared to control samples. The difference in gestational age is considered a major confounding factor in the transcriptome regulation of the placenta. To alleviate this significant problem, we propose here a novel approach to pinpoint disease-specific cis-eQTLs. By statistical correction for gestational age at sampling as well as other confounding/surrogate variables systematically searched and identified, we found 43 e-genes for which proximal SNPs influence expression level. Then, we performed the analysis again, removing the disease status from the covariates, and we identified 54 e-genes, 16 of which are identified de novo and, thus, possibly related to placental disease. We found a highly significant overlap with previous studies for the list of 43 e-genes, validating our methodology and findings. Among the 16 disease-specific e-genes, several are intrinsic to trophoblast biology and, therefore, constitute novel targets of interest to better characterize placental pathology and its varied clinical consequences. The approach that we used may also be applied to the study of other human diseases where confounding factors have hampered a better understanding of the pathology.}, } @article {pmid36830307, year = {2023}, author = {Liu, H and Liu, X and He, J and Zhang, L and Zhao, F and Zhou, Z and Hua, X and Yu, Y}, title = {Emergence and Evolution of OXA-23-Producing ST46Pas-ST462Oxf-KL28-OCL1 Carbapenem-Resistant Acinetobacter baumannii Mediated by a Novel ISAba1-Based Tn7534 Transposon.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {12}, number = {2}, pages = {}, pmid = {36830307}, issn = {2079-6382}, support = {2018YFE0102100//National Key Research and Development Program of China grant/ ; 81861138054//National Natural Science Foundation of China/ ; 82072313//National Natural Science Foundation of China/ ; }, abstract = {Carbapenem-resistant Acinetobacter baumannii (CRAB) isolates of global clone 1 (GC1) and global clone 2 (GC2) have been widely reported. Nevertheless, non-GC1 and non-GC2 CRAB strains have been studied less. In particular, no reports concerning sequence type 46 (ST46Pas) CRAB strains have been described thus far. In this work, the genomic features and possible evolution mechanism of ST46Pas OXA-23-producing CRAB isolates from clinical specimens are reported for the first time. Antimicrobial susceptibility testing of three ST46Pas strains revealed identical resistance profiles (resistance to imipenem, meropenem, ciprofloxacin and the combination of cefoperazone/sulbactam at a 2:1 ratio). They were found to belong to ST46Pas and ST462Oxf with capsular polysaccharide 28 (KL28) and lipooligosaccharide 1 (OCL1), respectively. Whole-genome sequencing (WGS) revealed that all contained one copy of chromosomal blaOXA-23, which was located in a novel ISAba1-based Tn7534 composite transposon. In particular, another copy of the Tn7534 composite transposon was identified in an Hgz_103-type plasmid with 9 bp target site duplications (TSDs, ACAACATGC) in the A. baumannii ZHOU strain. As the strains originated from two neighboring intensive care units (ICUs), ST46Pas OXA-23-producing CRAB strains may have evolved via transposition events or a pdif module. Based on the GenBank database, ST46Pas strains were collected from various sources; however, most were collected in Hangzhou (China) from 2014 to 2021. Pan-genome analysis revealed 3276 core genes, 0 soft-core genes, 768 shell genes and 443 cloud genes shared among all ST46Pas strains. In conclusion, the emergence of ST46Pas CRAB strains might present a new threat to healthcare settings; therefore, effective surveillance is required to prevent further dissemination.}, } @article {pmid36828537, year = {2023}, author = {Parker, K and Wood, H and Russell, JA and Yarmosh, D and Shteyman, A and Bagnoli, J and Knight, B and Aspinwall, JR and Jacobs, J and Werking, K and Winegar, R}, title = {Development and Optimization of an Unbiased, Metagenomics-Based Pathogen Detection Workflow for Infectious Disease and Biosurveillance Applications.}, journal = {Tropical medicine and infectious disease}, volume = {8}, number = {2}, pages = {}, pmid = {36828537}, issn = {2414-6366}, support = {HDTRA1-15-C-0013//the Defense Threat Reduction Agency-Joint Science and Technology Office for Chemical and Biological Defense/ ; }, abstract = {Rapid, specific, and sensitive identification of microbial pathogens is critical to infectious disease diagnosis and surveillance. Classical culture-based methods can be applied to a broad range of pathogens but have long turnaround times. Molecular methods, such as PCR, are time-effective but are not comprehensive and may not detect novel strains. Metagenomic shotgun next-generation sequencing (NGS) promises specific identification and characterization of any pathogen (viruses, bacteria, fungi, and protozoa) in a less biased way. Despite its great potential, NGS has yet to be widely adopted by clinical microbiology laboratories due in part to the absence of standardized workflows. Here, we describe a sample-to-answer workflow called PanGIA (Pan-Genomics for Infectious Agents) that includes simplified, standardized wet-lab procedures and data analysis with an easy-to-use bioinformatics tool. PanGIA is an end-to-end, multi-use workflow that can be used for pathogen detection and related applications, such as biosurveillance and biothreat detection. We performed a comprehensive survey and assessment of current, commercially available wet-lab technologies and open-source bioinformatics tools for each workflow component. The workflow includes total nucleic acid extraction from clinical human whole blood and environmental microbial forensic swabs as sample inputs, host nucleic acid depletion, dual DNA and RNA library preparation, shotgun sequencing on an Illumina MiSeq, and sequencing data analysis. The PanGIA workflow can be completed within 24 h and is currently compatible with bacteria and viruses. Here, we present data from the development and application of the clinical and environmental workflows, enabling the specific detection of pathogens associated with bloodstream infections and environmental biosurveillance, without the need for targeted assay development.}, } @article {pmid36824763, year = {2023}, author = {Joubert, PM and Krasileva, KV}, title = {Distinct genomic contexts predict gene presence-absence variation in different pathotypes of a fungal plant pathogen.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {36824763}, support = {DP2 AT011967/AT/NCCIH NIH HHS/United States ; }, abstract = {BACKGROUND: Fungi use the accessory segments of their pan-genomes to adapt to their environments. While gene presence-absence variation (PAV) contributes to shaping these accessory gene reservoirs, whether these events happen in specific genomic contexts remains unclear. Additionally, since pan-genome studies often group together all members of the same species, it is uncertain whether genomic or epigenomic features shaping pan-genome evolution are consistent across populations within the same species. Fungal plant pathogens are useful models for answering these questions because members of the same species often infect distinct hosts, and they frequently rely on gene PAV to adapt to these hosts.

RESULTS: We analyzed gene PAV in the rice and wheat blast fungus, Magnaporthe oryzae , and found that PAV of disease-causing effectors, antibiotic production, and non-self-recognition genes may drive the adaptation of the fungus to its environment. We then analyzed genomic and epigenomic features and data from available datasets for patterns that might help explain these PAV events. We observed that proximity to transposable elements (TEs), gene GC content, gene length, expression level in the host, and histone H3K27me3 marks were different between PAV genes and conserved genes, among other features. We used these features to construct a random forest classifier that was able to predict whether a gene is likely to experience PAV with high precision (86.06%) and recall (92.88%) in rice-infecting M. oryzae . Finally, we found that PAV in wheat- and rice-infecting pathotypes of M. oryzae differed in their number and their genomic context.

CONCLUSIONS: Our results suggest that genomic and epigenomic features of gene PAV can be used to better understand and even predict fungal pan-genome evolution. We also show that substantial intra-species variation can exist in these features.}, } @article {pmid36824272, year = {2023}, author = {Gao, Y and Xu, J and Li, Z and Zhang, Y and Riera, N and Xiong, Z and Ouyang, Z and Liu, X and Lu, Z and Seymour, D and Zhong, B and Wang, N}, title = {Citrus genomic resources unravel putative genetic determinants of Huanglongbing pathogenicity.}, journal = {iScience}, volume = {26}, number = {2}, pages = {106024}, pmid = {36824272}, issn = {2589-0042}, abstract = {Citrus HLB caused by Candidatus Liberibacter asiaticus is a pathogen-triggered immune disease. Here, we identified putative genetic determinants of HLB pathogenicity by integrating citrus genomic resources to characterize the pan-genome of accessions that differ in their response to HLB. Genome-wide association mapping and analysis of allele-specific expression between susceptible, tolerant, and resistant accessions further refined candidates underlying the response to HLB. We first developed a phased diploid assembly of Citrus sinensis 'Newhall' genome and produced resequencing data for 91 citrus accessions that differ in their response to HLB. These data were combined with previous resequencing data from 356 accessions for genome-wide association mapping of the HLB response. Genes determinants for HLB pathogenicity were associated with host immune response, ROS production, and antioxidants. Overall, this study has provided a significant resource of citrus genomic data and identified candidate genes to be further explored to understand the genetic determinants of HLB pathogenicity.}, } @article {pmid36823453, year = {2023}, author = {Webb, EA and Held, NA and Zhao, Y and Graham, ED and Conover, AE and Semones, J and Lee, MD and Feng, Y and Fu, FX and Saito, MA and Hutchins, DA}, title = {Importance of mobile genetic element immunity in numerically abundant Trichodesmium clades.}, journal = {ISME communications}, volume = {3}, number = {1}, pages = {15}, pmid = {36823453}, issn = {2730-6151}, support = {1657757//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1851222//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1851222//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1850719//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1850719//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 1850719//NSF | GEO | Division of Ocean Sciences (OCE)/ ; 2125191//NSF | Directorate for Biological Sciences (BIO)/ ; }, abstract = {The colony-forming cyanobacteria Trichodesmium spp. are considered one of the most important nitrogen-fixing genera in the warm, low nutrient ocean. Despite this central biogeochemical role, many questions about their evolution, physiology, and trophic interactions remain unanswered. To address these questions, we describe Trichodesmium pangenomic potential via significantly improved genomic assemblies from two isolates and 15 new >50% complete Trichodesmium metagenome-assembled genomes from hand-picked, Trichodesmium colonies spanning the Atlantic Ocean. Phylogenomics identified ~four N2 fixing clades of Trichodesmium across the transect, with T. thiebautii dominating the colony-specific reads. Pangenomic analyses showed that all T. thiebautii MAGs are enriched in COG defense mechanisms and encode a vertically inherited Type III-B Clustered Regularly Interspaced Short Palindromic Repeats and associated protein-based immunity system (CRISPR-Cas). Surprisingly, this CRISPR-Cas system was absent in all T. erythraeum genomes, vertically inherited by T. thiebautii, and correlated with increased signatures of horizontal gene transfer. Additionally, the system was expressed in metaproteomic and transcriptomic datasets and CRISPR spacer sequences with 100% identical hits to field-assembled, putative phage genome fragments were identified. While the currently CO2-limited T. erythraeum is expected to be a 'winner' of anthropogenic climate change, their genomic dearth of known phage resistance mechanisms, compared to T. thiebautii, could put this outcome in question. Thus, the clear demarcation of T. thiebautii maintaining CRISPR-Cas systems, while T. erythraeum does not, identifies Trichodesmium as an ecologically important CRISPR-Cas model system, and highlights the need for more research on phage-Trichodesmium interactions.}, } @article {pmid36819029, year = {2023}, author = {Liu, B and Ren, YS and Su, CY and Abe, Y and Zhu, DH}, title = {Pangenomic analysis of Wolbachia provides insight into the evolution of host adaptation and cytoplasmic incompatibility factor genes.}, journal = {Frontiers in microbiology}, volume = {14}, number = {}, pages = {1084839}, pmid = {36819029}, issn = {1664-302X}, abstract = {INTRODUCTION: The genus Wolbachia provides a typical example of intracellular bacteria that infect the germline of arthropods and filarial nematodes worldwide. Their importance as biological regulators of invertebrates, so it is particularly important to study the evolution, divergence and host adaptation of these bacteria at the genome-wide level.

METHODS: Here, we used publicly available Wolbachia genomes to reconstruct their evolutionary history and explore their adaptation under host selection.

RESULTS: Our findings indicate that segmental and single-gene duplications, such as DNA methylase, bZIP transcription factor, heat shock protein 90, in single monophyletic Wolbachia lineages (including supergroups A and B) may be responsible for improving the ability to adapt to a broad host range in arthropod-infecting strains. In contrast to A strains, high genetic diversity and rapidly evolving gene families occur in B strains, which may promote the ability of supergroup B strains to adapt to new hosts and their large-scale spreading. In addition, we hypothesize that there might have been two independent horizontal transfer events of cif genes in two sublineages of supergroup A strains. Interestingly, during the independent evolution of supergroup A and B strains, the rapid evolution of cif genes in supergroup B strains resulted in the loss of their functional domain, reflected in a possible decrease in the proportion of induced cytoplasmic incompatibility (CI) strains.

DISCUSSION: This present study highlights for reconstructing of evolutionary history, addressing host adaptation-related evolution and exploring the origin and divergence of CI genes in each Wolbachia supergroup. Our results thus not only provide a basis for further exploring the evolutionary history of Wolbachia adaptation under host selection but also reveal a new research direction for studying the molecular regulation of Wolbachia- induced cytoplasmic incompatibility.}, } @article {pmid36817109, year = {2022}, author = {Dereeper, A and Allouch, N and Guerlais, V and Garnier, M and Ma, L and De Jonckheere, JF and Joseph, SJ and Ali, IKM and Talarmin, A and Marcelino, I}, title = {Naegleria genus pangenome reveals new structural and functional insights into the versatility of these free-living amoebae.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1056418}, pmid = {36817109}, issn = {1664-302X}, abstract = {INTRODUCTION: Free-living amoebae of the Naegleria genus belong to the major protist clade Heterolobosea and are ubiquitously distributed in soil and freshwater habitats. Of the 47 Naegleria species described, N. fowleri is the only one being pathogenic to humans, causing a rare but fulminant primary amoebic meningoencephalitis. Some Naegleria genome sequences are publicly available, but the genetic basis for Naegleria diversity and ability to thrive in diverse environments (including human brain) remains unclear.

METHODS: Herein, we constructed a high-quality Naegleria genus pangenome to obtain a comprehensive catalog of genes encoded by these amoebae. For this, we first sequenced, assembled, and annotated six new Naegleria genomes.

RESULTS AND DISCUSSION: Genome architecture analyses revealed that Naegleria may use genome plasticity features such as ploidy/aneuploidy to modulate their behavior in different environments. When comparing 14 near-to-complete genome sequences, our results estimated the theoretical Naegleria pangenome as a closed genome, with 13,943 genes, including 3,563 core and 10,380 accessory genes. The functional annotations revealed that a large fraction of Naegleria genes show significant sequence similarity with those already described in other kingdoms, namely Animalia and Plantae. Comparative analyses highlighted a remarkable genomic heterogeneity, even for closely related strains and demonstrate that Naegleria harbors extensive genome variability, reflected in different metabolic repertoires. If Naegleria core genome was enriched in conserved genes essential for metabolic, regulatory and survival processes, the accessory genome revealed the presence of genes involved in stress response, macromolecule modifications, cell signaling and immune response. Commonly reported N. fowleri virulence-associated genes were present in both core and accessory genomes, suggesting that N. fowleri's ability to infect human brain could be related to its unique species-specific genes (mostly of unknown function) and/or to differential gene expression. The construction of Naegleria first pangenome allowed us to move away from a single reference genome (that does not necessarily represent each species as a whole) and to identify essential and dispensable genes in Naegleria evolution, diversity and biology, paving the way for further genomic and post-genomic studies.}, } @article {pmid36815495, year = {2023}, author = {Favaro, L and Campanaro, S and Fugaban, JII and Treu, L and Jung, ES and d'Ovidio, L and de Oliveira, DP and Liong, MT and Ivanova, IV and Todorov, SD}, title = {Genomic, metabolomic, and functional characterisation of beneficial properties of Pediococcus pentosaceus ST58, isolated from human oral cavity.}, journal = {Beneficial microbes}, volume = {14}, number = {1}, pages = {57-72}, doi = {10.3920/BM2022.0067}, pmid = {36815495}, issn = {1876-2891}, mesh = {Humans ; Pediococcus pentosaceus/genetics/metabolism ; Random Amplified Polymorphic DNA Technique ; RNA, Ribosomal, 16S/genetics ; Pediococcus/genetics/metabolism ; *Probiotics ; *Bacteriocins/genetics/pharmacology ; Anti-Bacterial Agents/pharmacology ; *Listeria monocytogenes ; Genomics ; }, abstract = {Bacteriocins produced by lactic acid bacteria are proteinaceous antibacterial metabolites that normally exhibit bactericidal or bacteriostatic activity against genetically closely related bacteria. In this work, the bacteriocinogenic potential of Pediococcus pentosaceus strain ST58, isolated from oral cavity of a healthy volunteer was evaluated. To better understand the biological role of this strain, its technological and safety traits were deeply investigated through a combined approach considering physiological, metabolomic and genomic properties. Three out of 14 colonies generating inhibition zones were confirmed to be bacteriocin producers and, according to repPCR and RAPD-PCR, differentiation assays, and 16S rRNA sequencing it was confirmed to be replicates of the same strain, identified as P. pentosaceus, named ST58. Based on multiple isolation of the same strain (P. pentosaceus ST58) over the 26 weeks in screening process for the potential bacteriocinogenic strains from the oral cavity of the same volunteer, strain ST58 can be considered a persistent component of oral cavity microbiota. Genomic analysis of P. pentosaceus ST58 revealed the presence of operons encoding for bacteriocins pediocin PA-1 and penocin A. The produced bacteriocin(s) inhibited the growth of Listeria monocytogenes, Enterococcus spp. and some Lactobacillus spp. used to determine the activity spectrum. The highest levels of production (6400 AU/ml) were recorded against L. monocytogenes strains after 24 h of incubation and the antimicrobial activity was inhibited after treatment of the cell-free supernatants with proteolytic enzymes. Noteworthy, P. pentosaceus ST58 also presented antifungal activity and key metabolites potentially involved in these properties were identified. Overall, this strain can be of great biotechnological interest towards the development of effective bio-preservation cultures as well as potential health promoting microbes.}, } @article {pmid36814455, year = {2023}, author = {Christine, TD and Clothilde, C and Mathieu, B and Laurence, A and Valentin, K and Cédric, M and Wing Rod, A and Yves, V and Francois, S}, title = {FrangiPANe, a tool for creating a panreference using left behind reads.}, journal = {NAR genomics and bioinformatics}, volume = {5}, number = {1}, pages = {lqad013}, pmid = {36814455}, issn = {2631-9268}, abstract = {We present here FrangiPANe, a pipeline developed to build panreference using short reads through a map-then-assemble strategy. Applying it to 248 African rice genomes using an improved CG14 reference genome, we identified an average of 8 Mb of new sequences and 5290 new contigs per individual. In total, 1.4 G of new sequences, consisting of 1 306 676 contigs, were assembled. We validated 97.7% of the contigs of the TOG5681 cultivar individual assembly from short reads on a newly long reads genome assembly of the same TOG5681 cultivar. FrangiPANe also allowed the anchoring of 31.5% of the new contigs within the CG14 reference genome, with a 92.5% accuracy at 2 kb span. We annotated in addition 3252 new genes absent from the reference. FrangiPANe was developed as a modular and interactive application to simplify the construction of a panreference using the map-then-assemble approach. It is available as a Docker image containing (i) a Jupyter notebook centralizing codes, documentation and interactive visualization of results, (ii) python scripts and (iii) all the software and libraries requested for each step of the analysis. We foreseen our approach will help leverage large-scale illumina dataset for pangenome studies in GWAS or detection of selection.}, } @article {pmid36807539, year = {2022}, author = {Wang, ZF and Rouard, M and Droc, G and Heslop-Harrison, PJS and Ge, XJ}, title = {Genome assembly of Musa beccarii shows extensive chromosomal rearrangements and genome expansion during evolution of Musaceae genomes.}, journal = {GigaScience}, volume = {12}, number = {}, pages = {}, pmid = {36807539}, issn = {2047-217X}, mesh = {*Musa/genetics ; *Musaceae/genetics ; Genome, Plant ; Chromosomes ; DNA, Ribosomal ; Phylogeny ; }, abstract = {BACKGROUND: Musa beccarii (Musaceae) is a banana species native to Borneo, sometimes grown as an ornamental plant. The basic chromosome number of Musa species is x = 7, 10, or 11; however, M. beccarii has a basic chromosome number of x = 9 (2n = 2x = 18), which is the same basic chromosome number of species in the sister genera Ensete and Musella. Musa beccarii is in the section Callimusa, which is sister to the section Musa. We generated a high-quality chromosome-scale genome assembly of M. beccarii to better understand the evolution and diversity of genomes within the family Musaceae.

FINDINGS: The M. beccarii genome was assembled by long-read and Hi-C sequencing, and genes were annotated using both long Iso-seq and short RNA-seq reads. The size of M. beccarii was the largest among all known Musaceae assemblies (∼570 Mbp) due to the expansion of transposable elements and increased 45S ribosomal DNA sites. By synteny analysis, we detected extensive genome-wide chromosome fusions and fissions between M. beccarii and the other Musa and Ensete species, far beyond those expected from differences in chromosome number. Within Musaceae, M. beccarii showed a reduced number of terpenoid synthase genes, which are related to chemical defense, and enrichment in lipid metabolism genes linked to the physical defense of the cell wall. Furthermore, type III polyketide synthase was the most abundant biosynthetic gene cluster (BGC) in M. beccarii. BGCs were not conserved in Musaceae genomes.

CONCLUSIONS: The genome assembly of M. beccarii is the first chromosome-scale genome assembly in the Callimusa section in Musa, which provides an important genetic resource that aids our understanding of the evolution of Musaceae genomes and enhances our knowledge of the pangenome.}, } @article {pmid36797493, year = {2023}, author = {Rautiainen, M and Nurk, S and Walenz, BP and Logsdon, GA and Porubsky, D and Rhie, A and Eichler, EE and Phillippy, AM and Koren, S}, title = {Telomere-to-telomere assembly of diploid chromosomes with Verkko.}, journal = {Nature biotechnology}, volume = {}, number = {}, pages = {}, pmid = {36797493}, issn = {1546-1696}, support = {F32 GM134558/GM/NIGMS NIH HHS/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; Z99 HG999999/ImNIH/Intramural NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; }, abstract = {The Telomere-to-Telomere consortium recently assembled the first truly complete sequence of a human genome. To resolve the most complex repeats, this project relied on manual integration of ultra-long Oxford Nanopore sequencing reads with a high-resolution assembly graph built from long, accurate PacBio high-fidelity reads. We have improved and automated this strategy in Verkko, an iterative, graph-based pipeline for assembling complete, diploid genomes. Verkko begins with a multiplex de Bruijn graph built from long, accurate reads and progressively simplifies this graph by integrating ultra-long reads and haplotype-specific markers. The result is a phased, diploid assembly of both haplotypes, with many chromosomes automatically assembled from telomere to telomere. Running Verkko on the HG002 human genome resulted in 20 of 46 diploid chromosomes assembled without gaps at 99.9997% accuracy. The complete assembly of diploid genomes is a critical step towards the construction of comprehensive pangenome databases and chromosome-scale comparative genomics.}, } @article {pmid36795789, year = {2023}, author = {Mohamed, F and Ruiz Rodriguez, LG and Zorzoli, A and Dorfmueller, HC and Raya, RR and Mozzi, F}, title = {Genomic diversity in Fructobacillus spp. isolated from fructose-rich niches.}, journal = {PloS one}, volume = {18}, number = {2}, pages = {e0281839}, pmid = {36795789}, issn = {1932-6203}, support = {109357/Z/15/Z/WT_/Wellcome Trust/United Kingdom ; 105606/Z/14/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Fructose/metabolism ; Phylogeny ; *Leuconostocaceae/genetics/metabolism ; *Lactobacillales/genetics ; Genomics ; }, abstract = {The Fructobacillus genus is a group of obligately fructophilic lactic acid bacteria (FLAB) that requires the use of fructose or another electron acceptor for their growth. In this work, we performed a comparative genomic analysis within the genus Fructobacillus by using 24 available genomes to evaluate genomic and metabolic differences among these organisms. In the genome of these strains, which varies between 1.15- and 1.75-Mbp, nineteen intact prophage regions, and seven complete CRISPR-Cas type II systems were found. Phylogenetic analyses located the studied genomes in two different clades. A pangenome analysis and a functional classification of their genes revealed that genomes of the first clade presented fewer genes involved in the synthesis of amino acids and other nitrogen compounds. Moreover, the presence of genes strictly related to the use of fructose and electron acceptors was variable within the genus, although these variations were not always related to the phylogeny.}, } @article {pmid36794816, year = {2023}, author = {Derrien, M and Mikulic, N and Uyoga, MA and Chenoll, E and Climent, E and Howard-Varona, A and Nyilima, S and Stoffel, NU and Karanja, S and Kottler, R and Stahl, B and Zimmermann, MB and Bourdet-Sicard, R}, title = {Gut microbiome function and composition in infants from rural Kenya and association with human milk oligosaccharides.}, journal = {Gut microbes}, volume = {15}, number = {1}, pages = {2178793}, pmid = {36794816}, issn = {1949-0984}, mesh = {Humans ; Infant ; *Milk, Human/chemistry ; *Gastrointestinal Microbiome/genetics ; Kenya/epidemiology ; Oligosaccharides ; Bifidobacterium/genetics ; }, abstract = {The gut microbiota evolves rapidly after birth, responding dynamically to environmental factors and playing a key role in short- and long-term health. Lifestyle and rurality have been shown to contribute to differences in the gut microbiome, including Bifidobacterium levels, between infants. We studied the composition, function and variability of the gut microbiomes of 6- to 11-month-old Kenyan infants (n = 105). Shotgun metagenomics showed Bifidobacterium longum to be the dominant species. A pangenomic analysis of B. longum in gut metagenomes revealed a high prevalence of B. longum subsp. infantis (B. infantis) in Kenyan infants (80%), and possible co-existence of this subspecies with B. longum subsp. longum. Stratification of the gut microbiome into community (GMC) types revealed differences in composition and functional features. GMC types with a higher prevalence of B. infantis and abundance of B. breve also had a lower pH and a lower abundance of genes encoding pathogenic features. An analysis of human milk oligosaccharides (HMOs) classified the human milk (HM) samples into four groups defined on the basis of secretor and Lewis polymorphisms revealed a higher prevalence of HM group III (Se+, Le-) (22%) than in most previously studied populations, with an enrichment in 2'-fucosyllactose. Our results show that the gut microbiome of partially breastfed Kenyan infants over the age of six months is enriched in bacteria from the Bifidobacterium community, including B. infantis, and that the high prevalence of a specific HM group may indicate a specific HMO-gut microbiome association. This study sheds light on gut microbiome variation in an understudied population with limited exposure to modern microbiome-altering factors.}, } @article {pmid36792708, year = {2023}, author = {Fudge, JB}, title = {Capturing haplotype variation in populations using pangenome references.}, journal = {Nature biotechnology}, volume = {41}, number = {2}, pages = {194}, doi = {10.1038/s41587-023-01691-1}, pmid = {36792708}, issn = {1546-1696}, mesh = {Haplotypes/genetics ; *Genomics ; *Genetic Variation/genetics ; }, } @article {pmid36792019, year = {2023}, author = {Lekired, A and Cherif-Silini, H and Silini, A and Ben Yahia, H and Ouzari, HI}, title = {Comparative genomics reveals the acquisition of mobile genetic elements by the plant growth-promoting Pantoea eucrina OB49 in polluted environments.}, journal = {Genomics}, volume = {115}, number = {2}, pages = {110579}, doi = {10.1016/j.ygeno.2023.110579}, pmid = {36792019}, issn = {1089-8646}, mesh = {*Metals, Heavy ; *Pantoea/genetics ; Biodegradation, Environmental ; Interspersed Repetitive Sequences ; Genomics ; }, abstract = {Heavy metal-tolerant plant growth-promoting bacteria (PGPB) have gained popularity in bioremediation in recent years. A genome-assisted study of a heavy metal-tolerant PGPB Pantoea eucrina OB49 isolated from the rhizosphere of wheat grown on a heavy metal-contaminated site is presented. Comparative pan-genome analysis indicated that OB49 acquired heavy metal resistance genes through horizontal gene transfer. On contigs S10 and S12, OB49 has two arsRBCH operons that give arsenic resistance. On the S12 contig, an arsRBCH operon was discovered in conjunction with the merRTPCADE operon, which provides mercury resistance. P. eucrina OB49 may be involved in an ecological alternative for heavy metal remediation and growth promotion of wheat grown in metal-polluted soils. Our results suggested the detection of mobile genetic elements that harbour the ars operon and the fluoride resistance genes adjacent to the mer operon.}, } @article {pmid36781662, year = {2023}, author = {Thomas, WJW and Zhang, Y and Amas, JC and Cantila, AY and Zandberg, JD and Harvie, SL and Batley, J}, title = {Innovative Advances in Plant Genotyping.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2638}, number = {}, pages = {451-465}, pmid = {36781662}, issn = {1940-6029}, mesh = {Genotype ; *Genotyping Techniques ; *Genome, Plant ; Sequence Analysis, DNA ; Crops, Agricultural/genetics ; Polymorphism, Single Nucleotide ; }, abstract = {Over the past decade, advances in plant genotyping have been critical in enabling the identification of genetic diversity, in understanding evolution, and in dissecting important traits in both crops and native plants. The widespread popularity of single-nucleotide polymorphisms (SNPs) has prompted significant improvements to SNP-based genotyping, including SNP arrays, genotyping by sequencing, and whole-genome resequencing. More recent approaches, including genotyping structural variants, utilizing pangenomes to capture species-wide genetic diversity and exploiting machine learning to analyze genotypic data sets, are pushing the boundaries of what plant genotyping can offer. In this chapter, we highlight these innovations and discuss how they will accelerate and advance future genotyping efforts.}, } @article {pmid36778393, year = {2023}, author = {Bonnie, JK and Ahmed, O and Langmead, B}, title = {DandD: efficient measurement of sequence growth and similarity.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {36778393}, abstract = {Genome assembly databases are growing rapidly. The sequence content in each new assembly can be largely redundant with previous ones, but this is neither conceptually nor algorithmically easy to measure. We propose new methods and a new tool called DandD that addresses the question of how much new sequence is gained when a sequence collection grows. DandD can describe how much human structural variation is being discovered in each new human genome assembly and when discoveries will level off in the future. DandD uses a measure called δ ("delta"), developed initially for data compression. Computing δ directly requires counting k-mers, but DandD can rapidly estimate it using genomic sketches. We also propose δ as an alternative to k-mer-specific cardinalities when computing the Jaccard coefficient, avoiding the pitfalls of a poor choice of k. We demonstrate the utility of DandD's functions for estimating δ, characterizing the rate of pangenome growth, and computing allpairs similarities using k-independent Jaccard. DandD is open source software available at: https://github.com/jessicabonnie/dandd .}, } @article {pmid36777875, year = {2022}, author = {Grimplet, J}, title = {Genomic and Bioinformatic Resources for Perennial Fruit Species.}, journal = {Current genomics}, volume = {23}, number = {4}, pages = {217-233}, pmid = {36777875}, issn = {1389-2029}, abstract = {In the post-genomic era, data management and development of bioinformatic tools are critical for the adequate exploitation of genomics data. In this review, we address the actual situation for the subset of crops represented by the perennial fruit species. The agronomical singularity of these species compared to plant and crop model species provides significant challenges on the implementation of good practices generally not addressed in other species. Studies are usually performed over several years in non-controlled environments, usage of rootstock is common, and breeders heavily rely on vegetative propagation. A reference genome is now available for all the major species as well as many members of the economically important genera for breeding purposes. Development of pangenome for these species is beginning to gain momentum which will require a substantial effort in term of bioinformatic tool development. The available tools for genome annotation and functional analysis will also be presented.}, } @article {pmid36764870, year = {2023}, author = {Dwivedi, SL and Heslop-Harrison, P and Spillane, C and McKeown, PC and Edwards, D and Goldman, I and Ortiz, R}, title = {Evolutionary dynamics and adaptive benefits of deleterious mutations in crop gene pools.}, journal = {Trends in plant science}, volume = {28}, number = {6}, pages = {685-697}, doi = {10.1016/j.tplants.2023.01.006}, pmid = {36764870}, issn = {1878-4372}, mesh = {*Gene Pool ; Mutation/genetics ; *Biological Evolution ; Genomics ; Phenotype ; Genome, Plant/genetics ; Plant Breeding ; }, abstract = {Mutations with deleterious consequences in nature may be conditionally deleterious in crop plants. That is, while some genetic variants may reduce fitness under wild conditions and be subject to purifying selection, they can be under positive selection in domesticates. Such deleterious alleles can be plant breeding targets, particularly for complex traits. The difficulty of distinguishing favorable from unfavorable variants reduces the power of selection, while favorable trait variation and heterosis may be attributable to deleterious alleles. Here, we review the roles of deleterious mutations in crop breeding and discuss how they can be used as a new avenue for crop improvement with emerging genomic tools, including HapMaps and pangenome analysis, aiding the identification, removal, or exploitation of deleterious mutations.}, } @article {pmid36760124, year = {2023}, author = {Jin, S and Han, Z and Hu, Y and Si, Z and Dai, F and He, L and Cheng, Y and Li, Y and Zhao, T and Fang, L and Zhang, T}, title = {Structural variation (SV)-based pan-genome and GWAS reveal the impacts of SVs on the speciation and diversification of allotetraploid cottons.}, journal = {Molecular plant}, volume = {16}, number = {4}, pages = {678-693}, doi = {10.1016/j.molp.2023.02.004}, pmid = {36760124}, issn = {1752-9867}, mesh = {*Gossypium/genetics ; *Genome-Wide Association Study ; Genome, Plant/genetics ; Phenotype ; Tetraploidy ; }, abstract = {Structural variations (SVs) have long been described as being involved in the origin, adaption, and domestication of species. However, the underlying genetic and genomic mechanisms are poorly understood. Here, we report a high-quality genome assembly of Gossypium barbadense acc. Tanguis, a landrace that is closely related to formation of extra-long-staple (ELS) cultivated cotton. An SV-based pan-genome (Pan-SV) was then constructed using a total of 182 593 non-redundant SVs, including 2236 inversions, 97 398 insertions, and 82 959 deletions from 11 assembled genomes of allopolyploid cotton. The utility of this Pan-SV was then demonstrated through population structure analysis and genome-wide association studies (GWASs). Using segregation mapping populations produced through crossing ELS cotton and the landrace along with an SV-based GWAS, certain SVs responsible for speciation, domestication, and improvement in tetraploid cottons were identified. Importantly, some of the SVs presently identified as associated with the yield and fiber quality improvement had not been identified in previous SNP-based GWAS. In particular, a 9-bp insertion or deletion was found to associate with elimination of the interspecific reproductive isolation between Gossypium hirsutum and G. barbadense. Collectively, this study provides new insights into genome-wide, gene-scale SVs linked to important agronomic traits in a major crop species and highlights the importance of SVs during the speciation, domestication, and improvement of cultivated crop species.}, } @article {pmid36753700, year = {2023}, author = {Tanwar, AS and Shruptha, P and Paul, B and Murali, TS and Brand, A and Satyamoorthy, K}, title = {How Can Omics Inform Diabetic Foot Ulcer Clinical Management? A Whole Genome Comparison of Four Clinical Strains of Staphylococcus aureus.}, journal = {Omics : a journal of integrative biology}, volume = {27}, number = {2}, pages = {51-61}, doi = {10.1089/omi.2022.0184}, pmid = {36753700}, issn = {1557-8100}, mesh = {Humans ; Staphylococcus aureus/genetics ; *Diabetic Foot/drug therapy ; Anti-Bacterial Agents/therapeutic use ; Virulence Factors/genetics ; *Staphylococcal Infections ; *Diabetes Mellitus ; }, abstract = {Foot ulcers and associated infections significantly contribute to morbidity and mortality in diabetes. While diverse pathogens are found in the diabetes-related infected ulcers, Staphylococcus aureus remains one of the most virulent and widely prevalent pathogens. The high prevalence of S. aureus in chronic wound infections, especially in clinical settings, is attributed to its ability to evolve and acquire resistance against common antibiotics and to elicit an array of virulence factors. In this study, whole genome comparison of four strains of S. aureus (MUF168, MUF256, MUM270, and MUM475) isolated from diabetic foot ulcer (DFU) infections showing varying resistance patterns was carried out to study the genomic similarity, antibiotic resistance profiling, associated virulence factors, and sequence variations in drug targets. The comparative genome analysis showed strains MUM475 and MUM270 to be highly resistant, MUF256 with moderate levels of resistance, and MUF168 to be the least resistant. Strain MUF256 and MUM475 harbored more virulence factors compared with other two strains. Deleterious sequence variants were observed suggesting potential role in altering drug targets and drug efficacy. This comparative whole genome study offers new molecular insights that may potentially inform evidence-based diagnosis and treatment of DFUs in the clinic.}, } @article {pmid36753463, year = {2023}, author = {Hulin, MT and Hill, L and Jones, JDG and Ma, W}, title = {Pangenomic analysis reveals plant NAD[+] manipulation as an important virulence activity of bacterial pathogen effectors.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {120}, number = {7}, pages = {e2217114120}, pmid = {36753463}, issn = {1091-6490}, support = {BBS/E/J/000PR9797/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Virulence ; *NAD/metabolism ; *Cyclic ADP-Ribose/metabolism ; Bacteria/metabolism ; Plants/metabolism ; Pseudomonas syringae/metabolism ; NAD+ Nucleosidase/genetics/metabolism ; Bacterial Proteins/genetics/metabolism ; Plant Diseases/microbiology ; }, abstract = {Nicotinamide adenine dinucleotide (NAD[+]) has emerged as a key component in prokaryotic and eukaryotic immune systems. The recent discovery that Toll/interleukin-1 receptor (TIR) proteins function as NAD[+] hydrolases (NADase) links NAD[+]-derived small molecules with immune signaling. We investigated pathogen manipulation of host NAD[+] metabolism as a virulence strategy. Using the pangenome of the model bacterial pathogen Pseudomonas syringae, we conducted a structure-based similarity search from 35,000 orthogroups for type III effectors (T3Es) with potential NADase activity. Thirteen T3Es, including five newly identified candidates, were identified that possess domain(s) characteristic of seven NAD[+]-hydrolyzing enzyme families. Most Pseudomonas syringae strains that depend on the type III secretion system to cause disease, encode at least one NAD[+]-manipulating T3E, and many have several. We experimentally confirmed the type III-dependent secretion of a novel T3E, named HopBY, which shows structural similarity to both TIR and adenosine diphosphate ribose (ADPR) cyclase. Homologs of HopBY were predicted to be type VI effectors in diverse bacterial species, indicating potential recruitment of this activity by microbial proteins secreted during various interspecies interactions. HopBY efficiently hydrolyzes NAD[+] and specifically produces 2'cADPR, which can also be produced by TIR immune receptors of plants and by other bacteria. Intriguingly, this effector promoted bacterial virulence, indicating that 2'cADPR may not be the signaling molecule that directly initiates immunity. This study highlights a host-pathogen battleground centered around NAD[+] metabolism and provides insight into the NAD[+]-derived molecules involved in plant immunity.}, } @article {pmid36749783, year = {2023}, author = {Jirakkakul, J and Khoiri, AN and Duangfoo, T and Dulsawat, S and Sutheeworapong, S and Petsong, K and Wattanachaisaereekul, S and Paenkaew, P and Tachaleat, A and Cheevadhanarak, S and Prommeenate, P}, title = {Insights into the genome of Methylobacterium sp. NMS14P, a novel bacterium for growth promotion of maize, chili, and sugarcane.}, journal = {PloS one}, volume = {18}, number = {2}, pages = {e0281505}, pmid = {36749783}, issn = {1932-6203}, mesh = {Zea mays/genetics ; *Saccharum/genetics ; *Methylobacterium/genetics ; RNA, Ribosomal, 16S/genetics ; Edible Grain/genetics ; Phylogeny ; }, abstract = {A novel methylotrophic bacterium designated as NMS14P was isolated from the root of an organic coffee plant (Coffea arabica) in Thailand. The 16S rRNA sequence analysis revealed that this new isolate belongs to the genus Methylobacterium, and its novelty was clarified by genomic and comparative genomic analyses, in which NMS14P exhibited low levels of relatedness with other Methylobacterium-type strains. NMS14P genome consists of a 6,268,579 bp chromosome, accompanied by a 542,519 bp megaplasmid and a 66,590 bp plasmid, namely pNMS14P1 and pNMS14P2, respectively. Several genes conferring plant growth promotion are aggregated on both chromosome and plasmids, including phosphate solubilization, indole-3-acetic acid (IAA) biosynthesis, cytokinins (CKs) production, 1-aminocyclopropane-1-carboxylate (ACC) deaminase activity, sulfur-oxidizing activity, trehalose synthesis, and urea metabolism. Furthermore, pangenome analysis showed that NMS14P possessed the highest number of strain-specific genes accounting for 1408 genes, particularly those that are essential for colonization and survival in a wide array of host environments, such as ABC transporter, chemotaxis, quorum sensing, biofilm formation, and biosynthesis of secondary metabolites. In vivo tests have supported that NMS14P significantly promoted the growth and development of maize, chili, and sugarcane. Collectively, NMS14P is proposed as a novel plant growth-promoting Methylobacterium that could potentially be applied to a broad range of host plants as Methylobacterium-based biofertilizers to reduce and ultimately substitute the use of synthetic agrochemicals for sustainable agriculture.}, } @article {pmid36748949, year = {2023}, author = {Reddy, TS and Zomer, R and Mantri, N}, title = {Nanoformulations as a strategy to overcome the delivery limitations of cannabinoids.}, journal = {Phytotherapy research : PTR}, volume = {37}, number = {4}, pages = {1526-1538}, doi = {10.1002/ptr.7742}, pmid = {36748949}, issn = {1099-1573}, support = {//MGC Pharmaceuticals Limited/ ; }, mesh = {Humans ; *Cannabinoids ; *Cannabidiol/therapeutic use ; Dronabinol/pharmacokinetics ; Pain/drug therapy ; Lipids ; }, abstract = {Medical cannabis has received significant interest in recent years due to its promising benefits in the management of pain, anxiety, depression and neurological and movement disorders. Specifically, the major phytocannabinoids derived from the cannabis plant such as (-) trans-Δ[9] -tetrahydrocannabinol (THC) and cannabidiol (CBD), have been shown to be responsible for the pharmacological and therapeutic properties. Recently, these phytocannabinoids have also attracted special attention in cancer treatment due to their well-known palliative benefits in chemotherapy-induced nausea, vomiting, pain and loss of appetite along with their anticancer activities. Despite the enormous pharmacological benefits, the low aqueous solubility, high instability (susceptibility to extensive first pass metabolism) and poor systemic bioavailability restrict their utilization at clinical perspective. Therefore, drug delivery strategies based on nanotechnology are emerging to improve pharmacokinetic profile and bioavailability of cannabinoids as well as enhance their targeted delivery. Here, we critically review the nano-formulation systems engineered for overcoming the delivery limitations of native phytocannabinoids including polymeric and lipid-based nanoparticles (lipid nano capsules (LNCs), nanostructured lipid carriers (NLCs), nanoemulsions (NE) and self-emulsifying drug delivery systems (SEDDS)), ethosomes and cyclodextrins as well as their therapeutic applications.}, } @article {pmid36748707, year = {2022}, author = {Worden, PJ and Bogema, DR and Micallef, ML and Go, J and Deutscher, AT and Labbate, M and Green, TJ and King, WL and Liu, M and Seymour, JR and Jenkins, C}, title = {Phylogenomic diversity of Vibrio species and other Gammaproteobacteria isolated from Pacific oysters (Crassostrea gigas) during a summer mortality outbreak.}, journal = {Microbial genomics}, volume = {8}, number = {12}, pages = {}, pmid = {36748707}, issn = {2057-5858}, mesh = {Animals ; *Crassostrea ; Phylogeny ; *Gammaproteobacteria ; Australia/epidemiology ; *Vibrio ; Disease Outbreaks ; }, abstract = {The Pacific oyster (PO), Crassostrea gigas, is an important commercial marine species but periodically experiences large stock losses due to disease events known as summer mortality. Summer mortality has been linked to environmental perturbations and numerous viral and bacterial agents, indicating this disease is multifactorial in nature. In 2013 and 2014, several summer mortality events occurred within the Port Stephens estuary (NSW, Australia). Extensive culture and molecular-based investigations were undertaken and several potentially pathogenic Vibrio species were identified. To improve species identification and genomically characterise isolates obtained from this outbreak, whole-genome sequencing (WGS) and subsequent genomic analyses were performed on 48 bacterial isolates, as well as a further nine isolates from other summer mortality studies using the same batch of juveniles. Average nucleotide identity (ANI) identified most isolates to the species level and included members of the Photobacterium, Pseudoalteromonas, Shewanella and Vibrio genera, with Vibrio species making up more than two-thirds of all species identified. Construction of a phylogenomic tree, ANI analysis, and pan-genome analysis of the 57 isolates represents the most comprehensive culture-based phylogenomic survey of Vibrios during a PO summer mortality event in Australian waters and revealed large genomic diversity in many of the identified species. Our analysis revealed limited and inconsistent associations between isolate species and their geographical origins, or host health status. Together with ANI and pan-genome results, these inconsistencies suggest that to determine the role that microbes may have in Pacific oyster summer mortality events, isolate identification must be at the taxonomic level of strain. Our WGS data (specifically, the accessory genomes) differentiated bacterial strains, and coupled with associated metadata, highlight the possibility of predicting a strain's environmental niche and level of pathogenicity.}, } @article {pmid36748586, year = {2023}, author = {Rai, A and Suresh, G and Ria, B and L, V and Pk, S and Ipsita, S and Sasikala, C and Venkata Ramana, C}, title = {Phylogenomic analysis of the genus Alcanivorax: proposal for division of this genus into the emended genus Alcanivorax and two novel genera Alloalcanivorax gen. nov. and Isoalcanivorax gen. nov.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {73}, number = {1}, pages = {}, doi = {10.1099/ijsem.0.005672}, pmid = {36748586}, issn = {1466-5034}, mesh = {Sequence Analysis, DNA ; *Alcanivoraceae/genetics ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Fatty Acids/chemistry ; DNA, Bacterial/genetics ; Bacterial Typing Techniques ; Base Composition ; }, abstract = {The members of the genus Alcanivorax are key players in the removal of petroleum hydrocarbons from polluted marine environments. More than half of the species were described in the last decade using 16S rRNA gene phylogeny and genomic-based metrics. However, the 16S rRNA gene identity (<94 %) between some members of the genus Alcanivorax suggested their imprecise taxonomic status. In this study, we examined the taxonomic positions of Alcanivorax species using 16S rRNA phylogeny and further validated them using phylogenomic-related indexes such as digital DNA-DNA hybridization (dDDH), average nucleotide identity (ANI), average amino acid identity (AAI), percentage of conserved proteins (POCP) and comparative genomic studies. ANI and dDDH values confirmed that all the Alcanivorax species were well described at the species level. The phylotaxogenomic analysis showed that Alcanivorax species formed three clades. The inter-clade values of AAI and POCP were less than 70 %. The pan-genome evaluation depicted that the members shared 1223 core genes and its number increased drastically when analysed clade-wise. Therefore, these results necessitate the transfer of clade II and clade III members into Isoalcanivorax gen. nov. and Alloalcanivorax gen. nov., respectively, along with the emended description of the genus Alcanivorax sensu stricto.}, } @article {pmid36748580, year = {2022}, author = {Wietz, M and López-Pérez, M and Sher, D and Biller, SJ and Rodriguez-Valera, F}, title = {Microbe Profile: Alteromonas macleodii - a widespread, fast-responding, 'interactive' marine bacterium.}, journal = {Microbiology (Reading, England)}, volume = {168}, number = {11}, pages = {}, doi = {10.1099/mic.0.001236}, pmid = {36748580}, issn = {1465-2080}, mesh = {*Genome, Bacterial/genetics ; *Alteromonas/genetics/metabolism ; Phenotype ; Adaptation, Physiological ; Phylogeny ; Seawater/microbiology ; }, abstract = {Alteromonas macleodii is a marine heterotrophic bacterium with widespread distribution - from temperate to tropical oceans, and from surface to deep waters. Strains of A. macleodii exhibit considerable genomic and metabolic variability, and can grow rapidly on diverse organic compounds. A. macleodii is a model organism for the study of population genomics, physiological adaptations and microbial interactions, with individual genomes encoding diverse phenotypic traits influenced by recombination and horizontal gene transfer.}, } @article {pmid36748558, year = {2022}, author = {Cummins, EA and Hall, RJ and Connor, C and McInerney, JO and McNally, A}, title = {Distinct evolutionary trajectories in the Escherichia coli pangenome occur within sequence types.}, journal = {Microbial genomics}, volume = {8}, number = {11}, pages = {}, pmid = {36748558}, issn = {2057-5858}, mesh = {*Escherichia coli/genetics ; *Biological Evolution ; Genomics ; }, abstract = {The Escherichia coli species contains a diverse set of sequence types and there remain important questions regarding differences in genetic content within this population that need to be addressed. Pangenomes are useful vehicles for studying gene content within sequence types. Here, we analyse 21 E. coli sequence type pangenomes using comparative pangenomics to identify variance in both pangenome structure and content. We present functional breakdowns of sequence type core genomes and identify sequence types that are enriched in metabolism, transcription and cell membrane biogenesis genes. We also uncover metabolism genes that have variable core classification, depending on which allele is present. Our comparative pangenomics approach allows for detailed exploration of sequence type pangenomes within the context of the species. We show that ongoing gene gain and loss in the E. coli pangenome is sequence type-specific, which may be a consequence of distinct sequence type-specific evolutionary drivers.}, } @article {pmid36748494, year = {2022}, author = {Li, BB and Zhang, XJ and Wu, D and Zhang, DD and Fang, BZ and Liu, HC and Zhou, YG and Cai, M and Li, WJ and Nie, GX}, title = {Devosia ureilytica sp. nov., isolated from Kuche River in China.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {72}, number = {12}, pages = {}, doi = {10.1099/ijsem.0.005663}, pmid = {36748494}, issn = {1466-5034}, mesh = {*Fatty Acids/chemistry ; *Phospholipids/chemistry ; Phylogeny ; Rivers ; RNA, Ribosomal, 16S/genetics ; Ubiquinone/chemistry ; Sequence Analysis, DNA ; Base Composition ; DNA, Bacterial/genetics ; Bacterial Typing Techniques ; China ; }, abstract = {Two novel strains, designated XJ19-45[T] and XJ19-1, were isolated from water of Kuche River in Xinjiang Uygur Autonomous Region, China. Their cells were Gram-stain-negative, aerobic and motile rods. The phylogenetic analyses based on 16S rRNA genes and genomes showed that the two isolates belonged to the genus Devosia and the closest relative was Devosia subaequoris HST3-14[T]. The 16S rRNA genes sequences pairwise similarities, average nucleotide identities, digital DNA-DNA hybridizations and average amino acid identities between type strain XJ19-45[T] and other relatives were all less than 98.3, 80.3, 23.6 and 85.7 %, respectively, all below the species delineation thresholds. Pan-genomic analysis indicated that the novel isolate XJ19-45[T] shared 1594 core gene clusters with the 11 closely related type strains in Devosia, and the number of strain-specific clusters was 390. The major cellular fatty acids (>10 %) of the two isolates were summed feature 8, C18 : 1 ω7c 11-methyl and C16 : 0. Diphosphatidylglycerol, phosphatidylglycerol and glycolipids were the major polar lipids, and Q10 was the detected respiratory quinone. Based on the results of phenotypic, physiological, chemotaxonomic and genotypic characterizations, we propose that the isolates represent a novel species, for which the name Devosia ureilytica sp. nov. is proposed. The type strain is XJ19-45[T] (=CGMCC 1.19388[T]=KCTC 92263[T]).}, } @article {pmid36747706, year = {2023}, author = {Hoover, RL and Keffer, JL and Polson, SW and Chan, CS}, title = {Gallionellaceae pangenomic analysis reveals insight into phylogeny, metabolic flexibility, and iron oxidation mechanisms.}, journal = {bioRxiv : the preprint server for biology}, volume = {}, number = {}, pages = {}, pmid = {36747706}, abstract = {UNLABELLED: The iron-oxidizing Gallionellaceae drive a wide variety of biogeochemical cycles through their metabolisms and biominerals. To better understand the environmental impacts of Gallionellaceae, we need to improve our knowledge of their diversity and metabolisms, especially any novel iron oxidation mechanisms. Here, we used a pangenomic analysis of 103 genomes to resolve Gallionellaceae phylogeny and explore the range of genomic potential. Using a concatenated ribosomal protein tree and key gene patterns, we determined Gallionellaceae has four genera, divided into two groupsâ€"iron-oxidizing bacteria (FeOB) Gallionella , Sideroxydans , and Ferriphaselus with known iron oxidases (Cyc2, MtoA) and nitrite-oxidizing bacteria (NOB) Candidatus Nitrotoga with nitrite oxidase (Nxr). The FeOB and NOB have similar electron transport chains, including genes for reverse electron transport and carbon fixation. Auxiliary energy metabolisms including S oxidation, denitrification, and organotrophy were scattered throughout the Gallionellaceae FeOB. Within FeOB, we found genes that may represent adaptations for iron oxidation, including a variety of extracellular electron uptake (EEU) mechanisms. FeOB genomes encoded more predicted c -type cytochromes overall, notably more multiheme c -type cytochromes (MHCs) with >10 CXXCH motifs. These include homologs of several predicted outer membrane porin-MHC complexes, including MtoAB and Uet. MHCs are known to efficiently conduct electrons across longer distances and function across a wide range of redox potentials that overlap with mineral redox potentials, which can help expand the range of usable iron substrates. Overall, the results of pangenome analyses suggest that the Gallionellaceae genera Gallionella , Sideroxydans , and Ferriphaselus are primarily iron oxidizers, capable of oxidizing dissolved Fe [2+] as well as a range of solid iron or other mineral substrates.

IMPORTANCE: Neutrophilic iron-oxidizing bacteria (FeOB) produce copious iron (oxyhydr)oxides that can profoundly influence biogeochemical cycles, notably the fate of carbon and many metals. To fully understand environmental microbial iron oxidation, we need a thorough accounting of iron oxidation mechanisms. In this study we show the Gallionellaceae FeOB have both known iron oxidases as well as uncharacterized multiheme cytochromes (MHCs). MHCs are predicted to transfer electrons from extracellular substrates and likely confer metabolic capabilities that help Gallionellaceae occupy a range of different iron- and mineral-rich niches. Gallionellaceae appear to specialize in iron oxidation, so it makes sense that they would have multiple mechanisms to oxidize various forms of iron, given the many iron minerals on Earth, as well as the physiological and kinetic challenges faced by FeOB. The multiple iron/mineral oxidation mechanisms may help drive the widespread ecological success of Gallionellaceae.}, } @article {pmid36747219, year = {2023}, author = {Chen, H and King, R and Smith, D and Bayon, C and Ashfield, T and Torriani, S and Kanyuka, K and Hammond-Kosack, K and Bieri, S and Rudd, J}, title = {Combined pangenomics and transcriptomics reveals core and redundant virulence processes in a rapidly evolving fungal plant pathogen.}, journal = {BMC biology}, volume = {21}, number = {1}, pages = {24}, pmid = {36747219}, issn = {1741-7007}, support = {BB/J/00426X/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/E/C000I0250/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Transcriptome ; Virulence/genetics ; *Gene Expression Profiling ; Genome, Fungal ; Genes, Fungal ; Plant Diseases/microbiology ; }, abstract = {BACKGROUND: Studying genomic variation in rapidly evolving pathogens potentially enables identification of genes supporting their "core biology", being present, functional and expressed by all strains or "flexible biology", varying between strains. Genes supporting flexible biology may be considered to be "accessory", whilst the "core" gene set is likely to be important for common features of a pathogen species biology, including virulence on all host genotypes. The wheat-pathogenic fungus Zymoseptoria tritici represents one of the most rapidly evolving threats to global food security and was the focus of this study.

RESULTS: We constructed a pangenome of 18 European field isolates, with 12 also subjected to RNAseq transcription profiling during infection. Combining this data, we predicted a "core" gene set comprising 9807 sequences which were (1) present in all isolates, (2) lacking inactivating polymorphisms and (3) expressed by all isolates. A large accessory genome, consisting of 45% of the total genes, was also defined. We classified genetic and genomic polymorphism at both chromosomal and individual gene scales. Proteins required for essential functions including virulence had lower-than average sequence variability amongst core genes. Both core and accessory genomes encoded many small, secreted candidate effector proteins that likely interact with plant immunity. Viral vector-mediated transient in planta overexpression of 88 candidates failed to identify any which induced leaf necrosis characteristic of disease. However, functional complementation of a non-pathogenic deletion mutant lacking five core genes demonstrated that full virulence was restored by re-introduction of the single gene exhibiting least sequence polymorphism and highest expression.

CONCLUSIONS: These data support the combined use of pangenomics and transcriptomics for defining genes which represent core, and potentially exploitable, weaknesses in rapidly evolving pathogens.}, } @article {pmid36747211, year = {2023}, author = {Jia, Y and Xu, M and Hu, H and Chapman, B and Watt, C and Buerte, B and Han, N and Zhu, M and Bian, H and Li, C and Zeng, Z}, title = {Comparative gene retention analysis in barley, wild emmer, and bread wheat pangenome lines reveals factors affecting gene retention following gene duplication.}, journal = {BMC biology}, volume = {21}, number = {1}, pages = {25}, pmid = {36747211}, issn = {1741-7007}, support = {9176507//Grains Research and Development Corporation/ ; 113731971932//Innovative Research Group Project of the National Natural Science Foundation of China/ ; }, mesh = {*Gene Duplication ; Triticum/genetics ; *Hordeum/genetics ; Bread ; Multigene Family ; Evolution, Molecular ; Phylogeny ; }, abstract = {BACKGROUND: Gene duplication is a prevalent phenomenon and a major driving force underlying genome evolution. The process leading to the fixation of gene duplicates following duplication is critical to understand how genome evolves but remains fragmentally understood. Most previous studies on gene retention are based on gene duplicate analyses in single reference genome. No population-based comparative gene retention analysis has been performed to date.

RESULTS: Taking advantage of recently published genomic data in Triticeae, we dissected a divergent homogentisate phytyltransferase (HPT2) lineage caught in the middle stage of gene fixation following duplication. The presence/absence of HPT2 in barley (diploid), wild emmer (tetraploid), and bread wheat (hexaploid) pangenome lines appears to be associated with gene dosage constraint and environmental adaption. Based on these observations, we adopted a phylogeny-based orthology inference approach and performed comparative gene retention analyses across barley, wild emmer, and bread wheat. This led to the identification of 326 HPT2-pattern-like genes at whole genome scale, representing a pool of gene duplicates in the middle stage of gene fixation. Majority of these HPT2-pattern-like genes were identified as small-scale duplicates, such as dispersed, tandem, and proximal duplications. Natural selection analyses showed that HPT2-pattern-like genes have experienced relaxed selection pressure, which is generally accompanied with partial positive selection and transcriptional divergence. Functional enrichment analyses showed that HPT2-pattern-like genes are over-represented with molecular-binding and defense response functions, supporting the potential role of environmental adaption during gene retention. We also observed that gene duplicates from larger gene family are more likely to be lost, implying a gene dosage constraint effect. Further comparative gene retention analysis in barley and bread wheat pangenome lines revealed combined effects of species-specific selection and gene dosage constraint.

CONCLUSIONS: Comparative gene retention analyses at the population level support gene dosage constraint, environmental adaption, and species-specific selection as three factors that may affect gene retention following gene duplication. Our findings shed light on the evolutionary process leading to the retention of newly formed gene duplicates and will greatly improve our understanding on genome evolution via duplication.}, } @article {pmid36746216, year = {2023}, author = {Jeong, BR and Jang, J and Jin, E}, title = {Genome engineering via gene editing technologies in microalgae.}, journal = {Bioresource technology}, volume = {373}, number = {}, pages = {128701}, doi = {10.1016/j.biortech.2023.128701}, pmid = {36746216}, issn = {1873-2976}, mesh = {*Gene Editing ; *Microalgae/genetics/metabolism ; Genetic Engineering ; Biotechnology ; Metabolic Networks and Pathways ; Metabolic Engineering ; }, abstract = {CRISPR-Cas has revolutionized genetic modification with its comparative simplicity and accuracy, and it can be used even at the genomic level. Microalgae are excellent feedstocks for biofuels and nutraceuticals because they contain high levels of fatty acids, carotenoids, and other metabolites; however, genome engineering for microalgae is not yet as developed as for other model organisms. Microalgal engineering at the genetic and metabolic levels is relatively well established, and a few genomic resources are available. Their genomic information was used for a "safe harbor" site for stable transgene expression in microalgae. This review proposes further genome engineering schemes including the construction of sgRNA libraries, pan-genomic and epigenomic resources, and mini-genomes, which can together be developed into synthetic biology for carbon-based engineering in microalgae. Acetyl-CoA is at the center of carbon metabolic pathways and is further reviewed for the production of molecules including terpenoids in microalgae.}, } @article {pmid36741902, year = {2022}, author = {Srinivas, K and Ghatak, S and Pyngrope, DA and Angappan, M and Milton, AAP and Das, S and Lyngdoh, V and Lamare, JP and Prasad, MCB and Sen, A}, title = {Avian strains of emerging pathogen Escherichia fergusonii are phylogenetically diverse and harbor the greatest AMR dissemination potential among different sources: Comparative genomic evidence.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1080677}, pmid = {36741902}, issn = {1664-302X}, abstract = {INTRODUCTION: Escherichia fergusonii is regarded as an emerging pathogen with zoonotic potential. In the current study, we undertook source-wise comparative genomic analyses (resistome, virulome, mobilome and pangenome) to understand the antimicrobial resistance, virulence, mobile genetic elements and phylogenetic diversity of E. fergusonii.

METHODS: Six E. fergusonii strains (5 multidrug resistant strains and 1 biofilm former) were isolated from poultry (duck faeces and retail chicken samples). Following confirmation by phenotypic and molecular methods, the isolates were further characterized and their genomes were sequenced. Comparative resisto-virulo-mobilome analyses and pangenomics were performed for E. fergusonii genomes, while including 125 other E. fergusonii genomes available from NCBI database.

RESULTS AND DISCUSSION: Avian and porcine strains of E. fergusonii were found to carry significantly higher number of antimicrobial resistance genes (p < 0.05) and mobile genetic elements (plasmids, transposons and integrons) (p < 0.05), while the pathogenic potential of bovine strains was significantly higher compared to other strains (p < 0.05). Pan-genome development trends indicated open pan-genome for all strains (0 < γ < 1). Genomic diversity of avian strains was found to be greater than that from other sources. Phylogenetic analysis revealed close clustering among isolates of similar isolation source and geographical location. Indian isolates of E. fergusonii clustered closely with those from Chinese and a singleton Australian isolate. Overall, being the first pangenomic study on E. fergusonii, our analysis provided important cues on genomic features of the emerging pathogen E. fergusonii while highlighting the potential role of avian strains in dissemination of AMR.}, } @article {pmid36739346, year = {2023}, author = {Lanclos, VC and Rasmussen, AN and Kojima, CY and Cheng, C and Henson, MW and Faircloth, BC and Francis, CA and Thrash, JC}, title = {Ecophysiology and genomics of the brackish water adapted SAR11 subclade IIIa.}, journal = {The ISME journal}, volume = {17}, number = {4}, pages = {620-629}, pmid = {36739346}, issn = {1751-7370}, mesh = {Phylogeny ; *Saline Waters ; Oceans and Seas ; Genomics ; Biological Evolution ; *Alphaproteobacteria/genetics ; Seawater ; }, abstract = {The Order Pelagibacterales (SAR11) is the most abundant group of heterotrophic bacterioplankton in global oceans and comprises multiple subclades with unique spatiotemporal distributions. Subclade IIIa is the primary SAR11 group in brackish waters and shares a common ancestor with the dominant freshwater IIIb (LD12) subclade. Despite its dominance in brackish environments, subclade IIIa lacks systematic genomic or ecological studies. Here, we combine closed genomes from new IIIa isolates, new IIIa MAGS from San Francisco Bay (SFB), and 460 highly complete publicly available SAR11 genomes for the most comprehensive pangenomic study of subclade IIIa to date. Subclade IIIa represents a taxonomic family containing three genera (denoted as subgroups IIIa.1, IIIa.2, and IIIa.3) that had distinct ecological distributions related to salinity. The expansion of taxon selection within subclade IIIa also established previously noted metabolic differentiation in subclade IIIa compared to other SAR11 subclades such as glycine/serine prototrophy, mosaic glyoxylate shunt presence, and polyhydroxyalkanoate synthesis potential. Our analysis further shows metabolic flexibility among subgroups within IIIa. Additionally, we find that subclade IIIa.3 bridges the marine and freshwater clades based on its potential for compatible solute transport, iron utilization, and bicarbonate management potential. Pure culture experimentation validated differential salinity ranges in IIIa.1 and IIIa.3 and provided detailed IIIa cell size and volume data. This study is an important step forward for understanding the genomic, ecological, and physiological differentiation of subclade IIIa and the overall evolutionary history of SAR11.}, } @article {pmid36728698, year = {2023}, author = {Saikia, J and Kotoky, R and Debnath, R and Kumar, N and Gogoi, P and Yadav, A and Saikia, R}, title = {De novogenomic analysis ofEnterobacter asburiaeEBRJ12, a plant growth-promoting rhizobacteria isolated from the rhizosphere of Phaseolus vulgarisL.}, journal = {Journal of applied microbiology}, volume = {134}, number = {2}, pages = {}, doi = {10.1093/jambio/lxac090}, pmid = {36728698}, issn = {1365-2672}, support = {MLP-1016//CSIR/ ; }, mesh = {*Phaseolus ; Rhizosphere ; Siderophores/genetics/metabolism ; Plant Development ; Bacteria ; Plants/microbiology ; Plant Roots/microbiology ; Soil Microbiology ; }, abstract = {AIM: Environmental stresses such as water deficit induced stress are one of the major limiting factors in crop production. However, some plant growth-promoting rhizobacteria (PGPR) can promote plant growth in such adverse condition. Therefore, the objective was to isolate rhizospheric bacteria from Phaseolus vulgaris L. growing in a drought-affected soil and to analyze its plant growth promoting (PGP) efficacy to black gram (Vigna mungo L.) and Bhut jolokia (Capsicum chinense Jacq.). Whole-genome sequencing of the potential bacteria was targeted to analyze the genetic potential of the isolate as a plant growth-promoting agent.

METHODS AND RESULTS: The isolate Enterobacter asburiae EBRJ12 was selected based on its PGP efficacy, which significantly improved plant growth and development. The genomic analysis revealed the presence of one circular chromosome of size 4.8 Mb containing 16 genes for osmotic stress regulation including osmotically inducible protein osmY, outer membrane protein A precursor ompA, aquaporin Z, and an operon for osmoprotectant ABC transporter yehZYXW. Moreover, the genome has a complete genetic cluster for biosynthesis of siderophore Enterobactin and siderophore Aerobactin.The PGP effects were verified with black gram and Bhut jolokia in pot experiments. The isolate significantly increased the shoot length by 35.0% and root length by 58.0% of black gram, while 41.0% and 57.0% of elevation in shoot and root length were observed in Bhut jolokia compared to non-inoculated plants.

CONCLUSIONS: The EBRJ12 has PGP features that could improve the growth in host plants, and the genomic characterization revealed the presence of genetic potential for plant growth promotion.}, } @article {pmid36726175, year = {2023}, author = {Petersen, C and Sørensen, T and Nielsen, MR and Sondergaard, TE and Sørensen, JL and Fitzpatrick, DA and Frisvad, JC and Nielsen, KL}, title = {Comparative genomic study of the Penicillium genus elucidates a diverse pangenome and 15 lateral gene transfer events.}, journal = {IMA fungus}, volume = {14}, number = {1}, pages = {3}, pmid = {36726175}, issn = {2210-6340}, support = {NNF18OC0034952//Novo Nordisk Fonden/ ; }, abstract = {The Penicillia are known to produce a wide range natural products-some with devastating outcome for the agricultural industry and others with unexploited potential in different applications. However, a large-scale overview of the biosynthetic potential of different species has been lacking. In this study, we sequenced 93 Penicillium isolates and, together with eleven published genomes that hold similar assembly characteristics, we established a species phylogeny as well as defining a Penicillium pangenome. A total of 5612 genes were shared between ≥ 98 isolates corresponding to approximately half of the average number of genes a Penicillium genome holds. We further identified 15 lateral gene transfer events that have occurred in this collection of Penicillium isolates, which might have played an important role, such as niche adaption, in the evolution of these fungi. The comprehensive characterization of the genomic diversity in the Penicillium genus supersedes single-reference genomes, which do not necessarily capture the entire genetic variation.}, } @article {pmid36718535, year = {2023}, author = {Lu, Y and Luo, J and An, E and Lu, B and Wei, Y and Chen, X and Lu, K and Liang, S and Hu, H and Han, M and He, S and Shen, J and Guo, D and Bu, N and Yang, L and Xu, W and Lu, C and Xiang, Z and Tong, X and Dai, F}, title = {Deciphering the Genetic Basis of Silkworm Cocoon Colors Provides New Insights into Biological Coloration and Phenotypic Diversification.}, journal = {Molecular biology and evolution}, volume = {40}, number = {2}, pages = {}, pmid = {36718535}, issn = {1537-1719}, mesh = {Animals ; *Bombyx/genetics/metabolism ; Silk/genetics/metabolism ; Base Sequence ; Flavonoids/metabolism ; }, abstract = {The genetic basis of phenotypic variation is a long-standing concern of evolutionary biology. Coloration has proven to be a visual, easily quantifiable, and highly tractable system for genetic analysis and is an ever-evolving focus of biological research. Compared with the homogenized brown-yellow cocoons of wild silkworms, the cocoons of domestic silkworms are spectacularly diverse in color, such as white, green, and yellow-red; this provides an outstanding model for exploring the phenotypic diversification and biological coloration. Herein, the molecular mechanism underlying silkworm green cocoon formation was investigated, which was not fully understood. We demonstrated that five of the seven members of a sugar transporter gene cluster were specifically duplicated in the Bombycidae and evolved new spatial expression patterns predominantly expressed in silk glands, accompanying complementary temporal expression; they synergistically facilitate the uptake of flavonoids, thus determining the green cocoon. Subsequently, polymorphic cocoon coloring landscape involving multiple loci and the evolution of cocoon color from wild to domestic silkworms were analyzed based on the pan-genome sequencing data. It was found that cocoon coloration involved epistatic interaction between loci; all the identified cocoon color-related loci existed in wild silkworms; the genetic segregation, recombination, and variation of these loci shaped the multicolored cocoons of domestic silkworms. This study revealed a new mechanism for flavonoids-based biological coloration that highlights the crucial role of gene duplication followed by functional diversification in acquiring new genetic functions; furthermore, the results in this work provide insight into phenotypic innovation during domestication.}, } @article {pmid36707768, year = {2023}, author = {Sun, Y and Xiao, W and Wang, QN and Wang, J and Kong, XD and Ma, WH and Liu, SX and Ren, P and Xu, LN and Zhang, YJ}, title = {Multiple variation patterns of terpene synthases in 26 maize genomes.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {46}, pmid = {36707768}, issn = {1471-2164}, mesh = {*Zea mays/genetics/metabolism ; Terpenes/metabolism ; *Alkyl and Aryl Transferases/genetics ; Plants/metabolism ; }, abstract = {Terpenoids are important compounds associated with the pest and herbivore resistance mechanisms of plants; consequently, it is essential to identify and explore terpene synthase (TPS) genes in maize. In the present study, we identified 31 TPS genes based on a pan-genome of 26 high-quality maize genomes containing 20 core genes (present in all 26 lines), seven dispensable genes (present in 2 to 23 lines), three near-core genes (present in 24 to 25 lines), and one private gene (present in only 1 line). Evaluation of ka/ks values of TPS in 26 varieties revealed that TPS25 was subjected to positive selection in some varieties. Six ZmTPS had ka/ks values less than 1, indicating that they were subjected to purifying selection. In 26 genomes, significant differences were observed in ZmTPS25 expression between genes affected by structural variation (SV) and those not affected by SV. In some varieties, SV altered the conserved structural domains resulting in a considerable number of atypical genes. The analysis of RNA-seq data of maize Ostrinia furnacalis feeding revealed 10 differentially expressed ZmTPS, 9 of which were core genes. However, many atypical genes for these responsive genes were identified in several genomes. These findings provide a novel resource for functional studies of ZmTPS.}, } @article {pmid36706753, year = {2023}, author = {Younginger, BS and Mayba, O and Reeder, J and Nagarkar, DR and Modrusan, Z and Albert, ML and Byrd, AL}, title = {Enrichment of oral-derived bacteria in inflamed colorectal tumors and distinct associations of Fusobacterium in the mesenchymal subtype.}, journal = {Cell reports. Medicine}, volume = {4}, number = {2}, pages = {100920}, pmid = {36706753}, issn = {2666-3791}, mesh = {Humans ; *Colorectal Neoplasms/genetics ; Fusobacterium/genetics ; Microsatellite Instability ; Transcriptome ; }, abstract = {While the association between colorectal cancer (CRC) features and Fusobacterium has been extensively studied, less is known of other intratumoral bacteria. Here, we leverage whole transcriptomes from 807 CRC samples to dually characterize tumor gene expression and 74 intratumoral bacteria. Seventeen of these species, including 4 Fusobacterium spp., are classified as orally derived and are enriched among right-sided, microsatellite instability-high (MSI-H), and BRAF-mutant tumors. Across consensus molecular subtypes (CMSs), integration of Fusobacterium animalis (Fa) presence and tumor expression reveals that Fa has the most significant associations in mesenchymal CMS4 tumors despite a lower prevalence than in immune CMS1. Within CMS4, the prevalence of Fa is uniquely associated with collagen- and immune-related pathways. Additional Fa pangenome analysis reveals that stress response genes and the adhesion FadA are commonly expressed intratumorally. Overall, this study identifies oral-derived bacteria as enriched in inflamed tumors, and the associations of bacteria and tumor expression are context and species specific.}, } @article {pmid36703158, year = {2023}, author = {Wang, J and Yang, W and Zhang, S and Hu, H and Yuan, Y and Dong, J and Chen, L and Ma, Y and Yang, T and Zhou, L and Chen, J and Liu, B and Li, C and Edwards, D and Zhao, J}, title = {A pangenome analysis pipeline provides insights into functional gene identification in rice.}, journal = {Genome biology}, volume = {24}, number = {1}, pages = {19}, pmid = {36703158}, issn = {1474-760X}, mesh = {*Oryza/genetics ; Genome-Wide Association Study ; Genomics/methods ; Genome ; Computational Biology ; }, abstract = {BACKGROUND: A pangenome aims to capture the complete genetic diversity within a species and reduce bias in genetic analysis inherent in using a single reference genome. However, the current linear format of most plant pangenomes limits the presentation of position information for novel sequences. Graph pangenomes have been developed to overcome this limitation. However, bioinformatics analysis tools for graph format genomes are lacking.

RESULTS: To overcome this problem, we develop a novel strategy for pangenome construction and a downstream pangenome analysis pipeline (PSVCP) that captures genetic variants' position information while maintaining a linearized layout. Using PSVCP, we construct a high-quality rice pangenome using 12 representative rice genomes and analyze an international rice panel with 413 diverse accessions using the pangenome as the reference. We show that PSVCP successfully identifies causal structural variations for rice grain weight and plant height. Our results provide insights into rice population structure and genomic diversity. We characterize a new locus (qPH8-1) associated with plant height on chromosome 8 undetected by the SNP-based genome-wide association study (GWAS).

CONCLUSIONS: Our results demonstrate that the pangenome constructed by our pipeline combined with a presence and absence variation-based GWAS can provide additional power for genomic and genetic analysis. The pangenome constructed in this study and the associated genome sequence and genetic variants data provide valuable genomic resources for rice genomics research and improvement in future.}, } @article {pmid36699832, year = {2022}, author = {Lee, G and Choi, H and Liu, H and Han, YH and Paul, NC and Han, GH and Kim, H and Kim, PI and Seo, SI and Song, J and Sang, H}, title = {Biocontrol of the causal brown patch pathogen Rhizoctonia solani by Bacillus velezensis GH1-13 and development of a bacterial strain specific detection method.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1091030}, pmid = {36699832}, issn = {1664-462X}, abstract = {Brown patch caused by the basidiomycete fungus Rhizoctonia solani is an economically important disease of cool-season turfgrasses. In order to manage the disease, different types of fungicides have been applied, but the negative impact of fungicides on the environment continues to rise. In this study, the beneficial bacteria Bacillus velezensis GH1-13 was characterized as a potential biocontrol agent to manage brown patch disease. The strain GH1-13 strongly inhibited the mycelial growth of turf pathogens including different anastomosis groups of R. solani causing brown patch and large patch. R. solani AG2-2(IIIB) hyphae were morphologically changed, and fungal cell death resulted from exposure to the strain GH1-13. In addition, the compatibility of fungicides with the bacterial strain, and the combined application of fungicide azoxystrobin and the strain in brown patch control on creeping bentgrass indicated that the strain could serve as a biocontrol agent. To develop strain-specific detection method, two unique genes from chromosome and plasmid of GH1-13 were found using pan-genome analysis of 364 Bacillus strains. The unique gene from chromosome was successfully detected using both SYBR Green and TaqMan qPCR methods in bacterial DNA or soil DNA samples. This study suggests that application of GH1-13 offers an environmentally friendly approach via reducing fungicide application rates. Furthermore, the developed pipeline of strain-specific detection method could be a useful tool for detecting and studying the dynamics of specific biocontrol agents.}, } @article {pmid36699320, year = {2022}, author = {Hanafy, M and Hansen, C and Phanse, Y and Wu, CW and Nelson, K and Aschenbroich, SA and Talaat, AM}, title = {Characterization of early immune responses elicited by live and inactivated vaccines against Johne's disease in goats.}, journal = {Frontiers in veterinary science}, volume = {9}, number = {}, pages = {1046704}, pmid = {36699320}, issn = {2297-1769}, abstract = {Mycobacterium avium subspecies paratuberculosis (M. paratuberculosis) is the causative agent of Johne's disease, a chronic debilitating condition affecting ruminants causing significant economic losses to the dairy industry. Available inactivated vaccines are not effective in controlling the disease and vaccinated animals can continue to infect newly born calves. Recently, we have shown that a live-attenuated vaccine candidate (pgsN) is protective in goats and calves following challenge with virulent strains of M. paratuberculosis. To decipher the dynamics of the immune responses elicited by both live-attenuated and inactivated vaccines, we analyzed key immunological parameters of goats immunized through different routes when a marker-less pgsN vaccine was used. Within a few weeks, the inactivated vaccine triggered the formation of granulomas both at the site of inoculation and in regional lymph nodes, that increased in size over time and persisted until the end of the experiment. In contrast, granulomas induced by the pgsN vaccine were small and subsided during the study. Interestingly, in this vaccine group, histology demonstrated an initial abundance of intra-histiocytic mycobacterial bacilli at the site of inoculation, with recruitment of very minimal T lymphocytes to poorly organized granulomas. Over time, granulomas became more organized, with recruitment of greater numbers of T and B lymphocytes, which coincided with a lack of mycobacteria. For the inactivated vaccine group, mycobacterial bacilli were identified extracellularly within the center of caseating granulomas, with relatively equal proportions of B- and T-lymphocytes maintained across both early and late times. Despite the differences in granuloma-specific lymphocyte recruitment, markers for cell-mediated immunity (e.g., IFN-γ release) were robust in both injected pgsN and inactivated vaccine groups. In contrast, the intranasal live-attenuated vaccine did not elicit any reaction at site of inoculation, nor cell-mediated immune responses. Finally, 80% of animals in the inactivated vaccine group significantly reacted to purified protein derivatives from M. bovis, while reactivity was detected in only 20% of animals receiving pgsN vaccine, suggesting a higher level of cross reactivity for bovine tuberculosis when inactivated vaccine is used. Overall, these results depict the cellular recruitment strategies driving immune responses elicited by both live-attenuated and inactivated vaccines that target Johne's disease.}, } @article {pmid36698972, year = {2023}, author = {Yang, MR and Wu, YW}, title = {A Cross-Validated Feature Selection (CVFS) approach for extracting the most parsimonious feature sets and discovering potential antimicrobial resistance (AMR) biomarkers.}, journal = {Computational and structural biotechnology journal}, volume = {21}, number = {}, pages = {769-779}, pmid = {36698972}, issn = {2001-0370}, abstract = {Understanding genes and their underlying mechanisms is critical in deciphering how antimicrobial-resistant (AMR) bacteria withstand detrimental effects of antibiotic drugs. At the same time the genes related to AMR phenotypes may also serve as biomarkers for predicting whether a microbial strain is resistant to certain antibiotic drugs. We developed a Cross-Validated Feature Selection (CVFS) approach for robustly selecting the most parsimonious gene sets for predicting AMR activities from bacterial pan-genomes. The core idea behind the CVFS approach is interrogating features among non-overlapping sub-parts of the datasets to ensure the representativeness of the features. By randomly splitting the dataset into disjoint sub-parts, conducting feature selection within each sub-part, and intersecting the features shared by all sub-parts, the CVFS approach is able to achieve the goal of extracting the most representative features for yielding satisfactory AMR activity prediction accuracy. By testing this idea on bacterial pan-genome datasets, we showed that this approach was able to extract the most succinct feature sets that predicted AMR activities very well, indicating the potential of these genes as AMR biomarkers. The functional analysis demonstrated that the CVFS approach was able to extract both known AMR genes and novel ones, suggesting the capabilities of the algorithm in selecting relevant features and highlighting the potential of the novel genes in expanding the antimicrobial resistance gene databases.}, } @article {pmid36698060, year = {2023}, author = {Sivakumar, R and Pranav, PS and Annamanedi, M and Chandrapriya, S and Isloor, S and Rajendhran, J and Hegde, NR}, title = {Genome sequencing and comparative genomic analysis of bovine mastitis-associated Staphylococcus aureus strains from India.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {44}, pmid = {36698060}, issn = {1471-2164}, mesh = {Animals ; Cattle ; Female ; Humans ; Anti-Bacterial Agents ; Genomics ; *Mastitis, Bovine/epidemiology/microbiology ; Multilocus Sequence Typing ; Phylogeny ; *Staphylococcal Infections/microbiology/veterinary ; *Staphylococcus aureus/genetics ; *Genome, Bacterial ; India ; }, abstract = {BACKGROUND: Bovine mastitis accounts for significant economic losses to the dairy industry worldwide. Staphylococcus aureus is the most common causative agent of bovine mastitis. Investigating the prevalence of virulence factors and antimicrobial resistance would provide insight into the molecular epidemiology of mastitis-associated S. aureus strains. The present study is focused on the whole genome sequencing and comparative genomic analysis of 41 mastitis-associated S. aureus strains isolated from India.

RESULTS: The results elucidate explicit knowledge of 15 diverse sequence types (STs) and five clonal complexes (CCs). The clonal complexes CC8 and CC97 were found to be the predominant genotypes comprising 21 and 10 isolates, respectively. The mean genome size was 2.7 Mbp with a 32.7% average GC content. The pan-genome of the Indian strains of mastitis-associated S. aureus is almost closed. The genome-wide SNP-based phylogenetic analysis differentiated 41 strains into six major clades. Sixteen different spa types were identified, and eight isolates were untypeable. The cgMLST analysis of all S. aureus genome sequences reported from India revealed that S. aureus strain MUF256, isolated from wound fluids of a diabetic patient, was the common ancestor. Further, we observed that all the Indian mastitis-associated S. aureus isolates belonging to the CC97 are mastitis-associated. We identified 17 different antimicrobial resistance (AMR) genes among these isolates, and all the isolates used in this study were susceptible to methicillin. We also identified 108 virulence-associated genes and discuss their associations with different genotypes.

CONCLUSION: This is the first study presenting a comprehensive whole genome analysis of bovine mastitis-associated S. aureus isolates from India. Comparative genomic analysis revealed the genome diversity, major genotypes, antimicrobial resistome, and virulome of clinical and subclinical mastitis-associated S. aureus strains.}, } @article {pmid36695592, year = {2023}, author = {Giacomini, JJ and Torres-Morales, J and Dewhirst, FE and Borisy, GG and Mark Welch, JL}, title = {Site Specialization of Human Oral Veillonella Species.}, journal = {Microbiology spectrum}, volume = {11}, number = {1}, pages = {e0404222}, pmid = {36695592}, issn = {2165-0497}, support = {R01 DE016937/DE/NIDCR NIH HHS/United States ; R01 DE022586/DE/NIDCR NIH HHS/United States ; R01 DE030136/DE/NIDCR NIH HHS/United States ; }, mesh = {Humans ; *Veillonella/genetics ; Mouth/microbiology ; Tongue/microbiology ; Palatine Tonsil ; *Microbiota ; }, abstract = {Veillonella species are abundant members of the human oral microbiome with multiple interspecies commensal relationships. Examining the distribution patterns of Veillonella species across the oral cavity is fundamental to understanding their oral ecology. In this study, we used a combination of pangenomic analysis and oral metagenomic information to clarify Veillonella taxonomy and to test the site specialist hypothesis for the Veillonella genus, which contends that most oral bacterial species are adapted to live at specific oral sites. Using isolate genome sequences combined with shotgun metagenomic sequence data, we showed that Veillonella species have clear, differential site specificity: Veillonella parvula showed strong preference for supra- and subgingival plaque, while closely related V. dispar, as well as more distantly related V. atypica, preferred the tongue dorsum, tonsils, throat, and hard palate. In addition, the provisionally named Veillonella sp. Human Microbial Taxon 780 showed strong site specificity for keratinized gingiva. Using comparative genomic analysis, we identified genes associated with thiamine biosynthesis and the reductive pentose phosphate cycle that may enable Veillonella species to occupy their respective habitats. IMPORTANCE Understanding the microbial ecology of the mouth is fundamental for understanding human physiology. In this study, metapangenomics demonstrated that different Veillonella species have clear ecological preferences in the oral cavity of healthy humans, validating the site specialist hypothesis. Furthermore, the gene pool of different Veillonella species was found to be reflective of their ecology, illuminating the potential role of vitamins and carbohydrates in determining Veillonella distribution patterns and interspecies interactions.}, } @article {pmid36693839, year = {2023}, author = {Gao, Y and Guitton-Sert, L and Dessapt, J and Coulombe, Y and Rodrigue, A and Milano, L and Blondeau, A and Larsen, NB and Duxin, JP and Hussein, S and Fradet-Turcotte, A and Masson, JY}, title = {A CRISPR-Cas9 screen identifies EXO1 as a formaldehyde resistance gene.}, journal = {Nature communications}, volume = {14}, number = {1}, pages = {381}, pmid = {36693839}, issn = {2041-1723}, mesh = {Humans ; *CRISPR-Cas Systems ; DNA ; DNA Damage/drug effects/genetics ; DNA Repair/drug effects/genetics ; DNA Repair Enzymes/genetics/metabolism ; DNA Replication/drug effects/genetics ; *Exodeoxyribonucleases/genetics/metabolism ; *Fanconi Anemia/chemically induced/genetics ; *Formaldehyde/toxicity ; Genomic Instability/drug effects/genetics ; *Drug Tolerance/genetics ; }, abstract = {Fanconi Anemia (FA) is a rare, genome instability-associated disease characterized by a deficiency in repairing DNA crosslinks, which are known to perturb several cellular processes, including DNA transcription, replication, and repair. Formaldehyde, a by-product of metabolism, is thought to drive FA by generating DNA interstrand crosslinks (ICLs) and DNA-protein crosslinks (DPCs). However, the impact of formaldehyde on global cellular pathways has not been investigated thoroughly. Herein, using a pangenomic CRISPR-Cas9 screen, we identify EXO1 as a critical regulator of formaldehyde-induced DNA lesions. We show that EXO1 knockout cell lines exhibit formaldehyde sensitivity leading to the accumulation of replicative stress, DNA double-strand breaks, and quadriradial chromosomes, a typical feature of FA. After formaldehyde exposure, EXO1 is recruited to chromatin, protects DNA replication forks from degradation, and functions in parallel with the FA pathway to promote cell survival. In vitro, EXO1-mediated exonuclease activity is proficient in removing DPCs. Collectively, we show that EXO1 limits replication stress and DNA damage to counteract formaldehyde-induced genome instability.}, } @article {pmid36691844, year = {2023}, author = {Hu, J and Chen, L and Li, G and Pan, Y and Lu, Y and Chen, J and Xiong, W and Zeng, Z}, title = {Prevalence and genetic characteristics of fosB-positive Staphylococcus aureus in duck farms in Guangdong, China in 2020.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {78}, number = {3}, pages = {802-809}, doi = {10.1093/jac/dkad014}, pmid = {36691844}, issn = {1460-2091}, mesh = {Animals ; Humans ; Staphylococcus aureus/genetics ; Anti-Bacterial Agents ; Ducks ; Farms ; Prevalence ; Microbial Sensitivity Tests ; *Staphylococcal Infections/microbiology ; China/epidemiology ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Proto-Oncogene Proteins c-fos ; }, abstract = {OBJECTIVES: To investigate the epidemiology of fosB-positive Staphylococcus aureus in waterfowl farms in the Pearl River tributaries in Guangdong Province, China in 2020.

METHODS: A total of 63 S. aureus were recovered from 315 samples collected from six duck farms and one goose farm. PFGE, WGS and analysis were performed on 19 fosB-positive S. aureus.

RESULTS: The fosfomycin resistance rate of the strains was as high as 52.4% (33/63), and 30.1% (19/63) of the strains carried fosB. Resistance gene prediction results showed that duck farm environment-derived strains contained the oxazolidinone drug resistance gene optrA. All fosB-positive S. aureus were MRSA and most of them were MDR, mainly ST9-t899 and ST164-t899. PFGE showed that fosB-positive S. aureus from humans and ducks could be clustered into the same clade. In addition, core-genome SNP analysis showed that clonal transmission of S. aureus occurred between humans and water. Pan-genome analysis showed that S. aureus had an open pangenome. The fosB gene was located on 2610-2615 bp plasmids, which all contained a broad host-range plasmid replication protein family 13. Small plasmids carrying the fosB gene could be found in different multilocus STs of S. aureus.

CONCLUSIONS: This study indicated that duck farms in Guangdong, China could be an important reservoir of fosB-positive S. aureus. The spread of drug-resistant bacteria in waterfowl farms requires further monitoring.}, } @article {pmid36688776, year = {2023}, author = {Basak, C and Chakraborty, R}, title = {A novel strain of Shigella isolated from the gut of Lepidocephalichthys guntea has in its genome a complete gene package for Type ll secretion system, and elaborate repertoire of genes responsible for multiple antibiotic-resistance and metal resistance via specific efflux channels.}, journal = {Letters in applied microbiology}, volume = {76}, number = {1}, pages = {}, doi = {10.1093/lambio/ovac049}, pmid = {36688776}, issn = {1472-765X}, support = {//Bangladesh Council of Scientific and Industrial Research/ ; }, mesh = {Animals ; *Anti-Bacterial Agents/pharmacology ; *Drug Resistance, Multiple, Bacterial/genetics ; Escherichia coli ; Genome, Bacterial ; Membrane Transport Proteins/genetics ; Multilocus Sequence Typing ; *Operon ; *Shigella/classification ; }, abstract = {The bacterial strain GCP5 was isolated from the gut of a bottom-dwelling fish Lepidocephalichthys guntea, that lives in the Magurmari River near North Bengal University in Siliguri, India. GCP5 was phylogenetically assigned to the Shigella genus using whole genome-based trees, k-mer analysis, the multilocus species tree (MLST), and single nucleotide polymorphism (SNP)-based trees, and the genetic makeup of the isolate was determined following assembly of the genome sequences and genome annotation with several bioinformatics tools. The presence of a complete package of general-secretory-pathway (gsp) genes, grouped in an operon identical to a well-characterized type II secretion system (T2SS), was confirmed by genome mining of Shigella sp. GCP5. The operon's gsp genes shared the most homology with Escherichia coli gsp genes. A few more high-pathogenicity islands (HPIs) in the GCP5 genome were validated using the pan-genomes analysis pipeline (PGAP) and island viewer. Several antibiotic-resistance genes were found in this genome, as well as the existence of key antibiotic efflux pump families, allowing for the creation of a gene network of several antibiotic efflux transporters. In addition, the genome contained genes specific for nickel transport, the nikABCD system, and the RND family transporter cusCFBA, which confers resistance to copper and silver by effluxing out Cu+ and Ag+ ions.}, } @article {pmid36687647, year = {2022}, author = {Zhang, M and Yu, Y and Wang, Q and Chen, R and Wang, Y and Bai, Y and Song, Z and Lu, X and Hao, Y}, title = {Conjugation of plasmid harboring bla NDM-1 in a clinical Providencia rettgeri strain through the formation of a fusion plasmid.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1071385}, pmid = {36687647}, issn = {1664-302X}, abstract = {Providencia rettgeri has recently gained increased importance owing to the New Delhi metallo-β-lactamase (NDM) and other β-lactamases produced by its clinical isolates. These enzymes reduce the efficiency of antimicrobial therapy. Herein, we reported the findings of whole-genome sequence analysis and a comprehensive pan-genome analysis performed on a multidrug-resistant P. rettgeri 18004577 clinical strain recovered from the urine of a hospitalized patient in Shandong, China, in 2018. Providencia rettgeri 18004577 was found to have a genome assembly size of 4.6 Mb with a G + C content of 41%; a circular plasmid p18004577_NDM of 273.3 Kb, harboring an accessory multidrug-resistant region; and a circular, stable IncT plasmid p18004577_Rts of 146.2 Kb. Additionally, various resistance genes were identified in its genome, including bla NDM-1, bla OXA-10, bla PER-4, aph(3')-VI, ant(2'')-Ia, ant(3')-Ia, sul1, catB8, catA1, mph(E), and tet. Conjugation experiments and whole-genome sequencing revealed that the bla NDM-1 gene could be transferred to the transconjugant via the formation of pJ18004577_NDM, a novel hybrid plasmid. Based on the genetic comparison, the main possible formation process for pJ18004577_NDM was the insertion of the [ΔISKox2-IS26-ΔISKox2]-aph(3')-VI-bla NDM-1 translocatable unit module from p18004577_NDM into plasmid p18004577_Rts in the Russian doll insertion structure (ΔISKox2-IS26-ΔISKox2), which played a role similar to that of IS26 using the "copy-in" route in the mobilization of [aph(3')-VI]-bla NDM-1. The array, multiplicity, and diversity of the resistance and virulence genes in this strain necessitate stringent infection control, antibiotic stewardship, and periodic resistance surveillance/monitoring policies to preempt further horizontal and vertical spread of the resistance genes. Roary analysis based on 30 P. rettgeri strains pan genome identified 415 core, 756 soft core, 5,744 shell, and 12,967 cloud genes, highlighting the "close" nature of P. rettgeri pan-genome. After a comprehensive pan-genome analysis, representative biological information was revealed that included phylogenetic distances, presence or absence of genes across the P. rettgeri bacteria clade, and functional distribution of proteins. Moreover, pan-genome analysis has been shown to be an effective approach to better understand P. rettgeri bacteria because it helps develop various tailored therapeutic strategies based on their biological similarities and differences.}, } @article {pmid36687645, year = {2022}, author = {Hurtado-Páez, U and Álvarez Zuluaga, N and Arango Isaza, RE and Contreras-Moreira, B and Rouzaud, F and Robledo, J}, title = {Pan-genome association study of Mycobacterium tuberculosis lineage-4 revealed specific genes related to the high and low prevalence of the disease in patients from the North-Eastern area of Medellín, Colombia.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1076797}, pmid = {36687645}, issn = {1664-302X}, abstract = {Mycobacterium tuberculosis (Mtb) lineage 4 is responsible for the highest burden of tuberculosis (TB) worldwide. This lineage has been the most prevalent lineage in Colombia, especially in the North-Eastern (NE) area of Medellin, where it has been shown to have a high prevalence of LAM9 SIT42 and Haarlem1 SIT62 sublineages. There is evidence that regardless of environmental factors and host genetics, differences among sublineages of Mtb strains play an important role in the course of infection and disease. Nevertheless, the genetic basis of the success of a sublineage in a specific geographic area remains uncertain. We used a pan-genome-wide association study (pan-GWAS) of 47 Mtb strains isolated from NE Medellin between 2005 and 2008 to identify the genes responsible for the phenotypic differences among high and low prevalence sublineages. Our results allowed the identification of 12 variants in 11 genes, of which 4 genes showed the strongest association to low prevalence (mmpL12, PPE29, Rv1419, and Rv1762c). The first three have been described as necessary for invasion and intracellular survival. Polymorphisms identified in low prevalence isolates may suggest related to a fitness cost of Mtb, which might reflect a decrease in their capacity to be transmitted or to cause an active infection. These results contribute to understanding the success of some sublineages of lineage-4 in a specific geographical area.}, } @article {pmid36687572, year = {2022}, author = {Robinson, LA and Collins, ACZ and Murphy, RA and Davies, JC and Allsopp, LP}, title = {Diversity and prevalence of type VI secretion system effectors in clinical Pseudomonas aeruginosa isolates.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1042505}, pmid = {36687572}, issn = {1664-302X}, abstract = {Pseudomonas aeruginosa is an opportunistic pathogen and a major driver of morbidity and mortality in people with Cystic Fibrosis (CF). The Type VI secretion system (T6SS) is a molecular nanomachine that translocates effectors across the bacterial membrane into target cells or the extracellular environment enabling intermicrobial interaction. P. aeruginosa encodes three T6SS clusters, the H1-, H2- and H3-T6SS, and numerous orphan islands. Genetic diversity of T6SS-associated effectors in P. aeruginosa has been noted in reference strains but has yet to be explored in clinical isolates. Here, we perform a comprehensive bioinformatic analysis of the pangenome and T6SS effector genes in 52 high-quality clinical P. aeruginosa genomes isolated from CF patients and housed in the Personalised Approach to P. aeruginosa strain repository. We confirm that the clinical CF isolate pangenome is open and principally made up of accessory and unique genes that may provide strain-specific advantages. We observed genetic variability in some effector/immunity encoding genes and show that several well-characterised vgrG and PAAR islands are absent from numerous isolates. Our analysis shows clear evidence of disruption to T6SS genomic loci through transposon, prophage, and mobile genetic element insertions. We identified an orphan vgrG island in P. aeruginosa strain PAK and five clinical isolates using in silico analysis which we denote vgrG7, predicting a gene within this cluster to encode a Tle2 lipase family effector. Close comparison of T6SS loci in clinical isolates compared to reference P. aeruginosa strain PAO1 revealed the presence of genes encoding eight new T6SS effectors with the following putative functions: cytidine deaminase, lipase, metallopeptidase, NADase, and pyocin. Finally, the prevalence of characterised and putative T6SS effectors were assessed in 532 publicly available P. aeruginosa genomes, which suggests the existence of accessory effectors. Our in silico study of the P. aeruginosa T6SS exposes a level of genetic diversity at T6SS genomic loci not seen to date within P. aeruginosa, particularly in CF isolates. As understanding the effector repertoire is key to identifying the targets of T6SSs and its efficacy, this comprehensive analysis provides a path for future experimental characterisation of these mediators of intermicrobial competition and host manipulation.}, } @article {pmid36685843, year = {2022}, author = {Stuart, KC and Sherwin, WB and Edwards, RJ and Rollins, LA}, title = {Evolutionary genomics: Insights from the invasive European starlings.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {1010456}, pmid = {36685843}, issn = {1664-8021}, abstract = {Two fundamental questions for evolutionary studies are the speed at which evolution occurs, and the way that this evolution may present itself within an organism's genome. Evolutionary studies on invasive populations are poised to tackle some of these pressing questions, including understanding the mechanisms behind rapid adaptation, and how it facilitates population persistence within a novel environment. Investigation of these questions are assisted through recent developments in experimental, sequencing, and analytical protocols; in particular, the growing accessibility of next generation sequencing has enabled a broader range of taxa to be characterised. In this perspective, we discuss recent genetic findings within the invasive European starlings in Australia, and outline some critical next steps within this research system. Further, we use discoveries within this study system to guide discussion of pressing future research directions more generally within the fields of population and evolutionary genetics, including the use of historic specimens, phenotypic data, non-SNP genetic variants (e.g., structural variants), and pan-genomes. In particular, we emphasise the need for exploratory genomics studies across a range of invasive taxa so we can begin understanding broad mechanisms that underpin rapid adaptation in these systems. Understanding how genetic diversity arises and is maintained in a population, and how this contributes to adaptability, requires a deep understanding of how evolution functions at the molecular level, and is of fundamental importance for the future studies and preservation of biodiversity across the globe.}, } @article {pmid36685320, year = {2023}, author = {Liew, KJ and Zakaria, MR and Hong, CWL and Tan, MCY and Chong, CS}, title = {Draft genome sequence of Joostella atrarenae M1-2[T] with cellulolytic and hemicellulolytic ability.}, journal = {3 Biotech}, volume = {13}, number = {2}, pages = {50}, pmid = {36685320}, issn = {2190-572X}, abstract = {The halophilic genus Joostella is one of the least-studied genera in the family of Flavobacteriaceae. So far, only two species were taxonomically identified with limited genomic analysis in the aspect of application has been reported. Joostella atrarenae M1-2[T] was previously isolated from a seashore sample and it is the second discovered species of the genus Joostella. In this project, the genome of J. atrarenae M1-2[T] was sequenced using NovaSeq 6000. The final assembled genome is comprised of 71 contigs, a total of 3,983,942 bp, a GC ratio of 33.2%, and encoded for 3,416 genes. The 16S rRNA gene sequence of J. atrarenae M1-2[T] shows 97.3% similarity against J. marina DSM 19592[T]. Genome-genome comparison between the two strains by ANI, dDDH, AAI, and POCP shows values of 80.8%, 23.3%, 83.4%, and 74.1% respectively. Pan-genome analysis shows that strain M1-2[T] and J. marina DSM 19592[T] shared a total of 248 core genes. Taken together, strain M-2[T] and J. marina DSM 19592[T] belong to the same genus but are two different species. CAZymes analysis revealed that strain M1-2[T] harbors 109 GHs, 40 GTs, 5 PLs, 9 CEs, and 6 AAs. Among these CAZymes, while 5 genes are related to cellulose degradation, 12 and 24 genes are found to encode for xylanolytic enzymes and other hemicellulases that involve majorly in the side chain removal of the lignocellulose structure, respectively. Furthermore, both the intracellular and extracellular crude extracts of strain M1-2[T] exhibited enzymatic activities against CMC, xylan, pNPG, and pNPX substrates, which corresponding to endoglucanase, xylanase, β-glucosidase, and β-xylosidase, respectively. Collectively, description of genome coupled with the enzyme assay results demonstrated that J. atrarenae M1-2[T] has a role in lignocellulosic biomass degradation, and the strain could be useful for lignocellulosic biorefining.}, } @article {pmid36684744, year = {2022}, author = {Voelker, WG and Krishnan, K and Chougule, K and Alexander, LC and Lu, Z and Olson, A and Ware, D and Songsomboon, K and Ponce, C and Brenton, ZW and Boatwright, JL and Cooper, EA}, title = {Ten new high-quality genome assemblies for diverse bioenergy sorghum genotypes.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1040909}, pmid = {36684744}, issn = {1664-462X}, abstract = {INTRODUCTION: Sorghum (Sorghum bicolor (L.) Moench) is an agriculturally and economically important staple crop that has immense potential as a bioenergy feedstock due to its relatively high productivity on marginal lands. To capitalize on and further improve sorghum as a potential source of sustainable biofuel, it is essential to understand the genomic mechanisms underlying complex traits related to yield, composition, and environmental adaptations.

METHODS: Expanding on a recently developed mapping population, we generated de novo genome assemblies for 10 parental genotypes from this population and identified a comprehensive set of over 24 thousand large structural variants (SVs) and over 10.5 million single nucleotide polymorphisms (SNPs).

RESULTS: We show that SVs and nonsynonymous SNPs are enriched in different gene categories, emphasizing the need for long read sequencing in crop species to identify novel variation. Furthermore, we highlight SVs and SNPs occurring in genes and pathways with known associations to critical bioenergy-related phenotypes and characterize the landscape of genetic differences between sweet and cellulosic genotypes.

DISCUSSION: These resources can be integrated into both ongoing and future mapping and trait discovery for sorghum and its myriad uses including food, feed, bioenergy, and increasingly as a carbon dioxide removal mechanism.}, } @article {pmid36683686, year = {2022}, author = {Bai, Z and Zhang, N and Jin, Y and Chen, L and Mao, Y and Sun, L and Fang, F and Liu, Y and Han, M and Li, G}, title = {Comprehensive analysis of 84 Faecalibacterium prausnitzii strains uncovers their genetic diversity, functional characteristics, and potential risks.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {919701}, pmid = {36683686}, issn = {2235-2988}, mesh = {Humans ; *Faecalibacterium prausnitzii/genetics/metabolism ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Probiotics ; Genetic Variation ; }, abstract = {Faecalibacterium prausnitzii is a beneficial human gut microbe and a candidate for next-generation probiotics. With probiotics now being used in clinical treatments, concerns about their safety and side effects need to be considered. Therefore, it is essential to obtain a comprehensive understanding of the genetic diversity, functional characteristics, and potential risks of different F. prausnitzii strains. In this study, we collected the genetic information of 84 F . prausnitzii strains to conduct a pan-genome analysis with multiple perspectives. Based on single-copy genes and the sequences of 16S rRNA and the compositions of the pan-genome, different phylogenetic analyses of F. prausnitzii strains were performed, which showed the genetic diversity among them. Among the proteins of the pan-genome, we found that the accessory clusters made a greater contribution to the primary genetic functions of F. prausnitzii strains than the core and specific clusters. The functional annotations of F. prausnitzii showed that only a very small number of proteins were related to human diseases and there were no secondary metabolic gene clusters encoding harmful products. At the same time, complete fatty acid metabolism was detected in F. prausnitzii. In addition, we detected harmful elements, including antibiotic resistance genes, virulence factors, and pathogenic genes, and proposed the probiotic potential risk index (PPRI) and probiotic potential risk score (PPRS) to classify these 84 strains into low-, medium-, and high-risk groups. Finally, 15 strains were identified as low-risk strains and prioritized for clinical application. Undoubtedly, our results provide a comprehensive understanding and insight into F. prausnitzii, and PPRI and PPRS can be applied to evaluate the potential risks of probiotics in general and to guide the application of probiotics in clinical application.}, } @article {pmid36678781, year = {2022}, author = {Khan, MA and Amin, A and Farid, A and Ullah, A and Waris, A and Shinwari, K and Hussain, Y and Alsharif, KF and Alzahrani, KJ and Khan, H}, title = {Recent Advances in Genomics-Based Approaches for the Development of Intracellular Bacterial Pathogen Vaccines.}, journal = {Pharmaceutics}, volume = {15}, number = {1}, pages = {}, pmid = {36678781}, issn = {1999-4923}, abstract = {Infectious diseases continue to be a leading cause of morbidity and mortality worldwide. The majority of infectious diseases are caused by intracellular pathogenic bacteria (IPB). Historically, conventional vaccination drives have helped control the pathogenesis of intracellular bacteria and the emergence of antimicrobial resistance, saving millions of lives. However, in light of various limitations, many diseases that involve IPB still do not have adequate vaccines. In response to increasing demand for novel vaccine development strategies, a new area of vaccine research emerged following the advent of genomics technology, which changed the paradigm of vaccine development by utilizing the complete genomic data of microorganisms against them. It became possible to identify genes related to disease virulence, genetic patterns linked to disease virulence, as well as the genetic components that supported immunity and favorable vaccine responses. Complete genomic databases, and advancements in transcriptomics, metabolomics, structural genomics, proteomics, immunomics, pan-genomics, synthetic genomics, and population biology have allowed researchers to identify potential vaccine candidates and predict their effects in patients. New vaccines have been created against diseases for which previously there were no vaccines available, and existing vaccines have been improved. This review highlights the key issues and explores the evolution of vaccines. The increasing volume of IPB genomic data, and their application in novel genome-based techniques for vaccine development, were also examined, along with their characteristics, and the opportunities and obstacles involved. Critically, the application of genomics technology has helped researchers rapidly select and evaluate candidate antigens. Novel vaccines capable of addressing the limitations associated with conventional vaccines have been developed and pressing healthcare issues are being addressed.}, } @article {pmid36677470, year = {2023}, author = {Charles, C and Conde, C and Vorimore, F and Cochard, T and Michelet, L and Boschiroli, ML and Biet, F}, title = {Features of Mycobacterium bovis Complete Genomes Belonging to 5 Different Lineages.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, pmid = {36677470}, issn = {2076-2607}, support = {773830//One Health European Joint Programme (OHEJP)/ ; }, abstract = {Mammalian tuberculosis (TB) is a zoonotic disease mainly due to Mycobacterium bovis (M. bovis). A current challenge for its eradication is understanding its transmission within multi-host systems. Improvements in long-read sequencing technologies have made it possible to obtain complete bacterial genomes that provide a comprehensive view of species-specific genomic features. In the context of TB, new genomic references based on complete genomes genetically close to field strains are also essential to perform precise field molecular epidemiological studies. A total of 10 M. bovis strains representing each genetic lineage identified in France and in other countries were selected for performing complete assembly of their genomes. Pangenome analysis revealed a "closed" pangenome composed of 3900 core genes and only 96 accessory genes. Whole genomes-based alignment using progressive Mauve showed remarkable conservation of the genomic synteny except that the genomes have a variable number of copies of IS6110. Characteristic genomic traits of each lineage were identified through the discovery of specific indels. Altogether, these results provide new genetic features that improve the description of M. bovis lineages. The availability of new complete representative genomes of M. bovis will be useful to epidemiological studies and better understand the transmission of this clonal-evolving pathogen.}, } @article {pmid36677411, year = {2023}, author = {Thakur, P and Alaba, MO and Rauniyar, S and Singh, RN and Saxena, P and Bomgni, A and Gnimpieba, EZ and Lushbough, C and Goh, KM and Sani, RK}, title = {Text-Mining to Identify Gene Sets Involved in Biocorrosion by Sulfate-Reducing Bacteria: A Semi-Automated Workflow.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, pmid = {36677411}, issn = {2076-2607}, support = {P20 GM103443/GM/NIGMS NIH HHS/United States ; }, abstract = {A significant amount of literature is available on biocorrosion, which makes manual extraction of crucial information such as genes and proteins a laborious task. Despite the fast growth of biology related corrosion studies, there is a limited number of gene collections relating to the corrosion process (biocorrosion). Text mining offers a potential solution by automatically extracting the essential information from unstructured text. We present a text mining workflow that extracts biocorrosion associated genes/proteins in sulfate-reducing bacteria (SRB) from literature databases (e.g., PubMed and PMC). This semi-automatic workflow is built with the Named Entity Recognition (NER) method and Convolutional Neural Network (CNN) model. With PubMed and PMCID as inputs, the workflow identified 227 genes belonging to several Desulfovibrio species. To validate their functions, Gene Ontology (GO) enrichment and biological network analysis was performed using UniprotKB and STRING-DB, respectively. The GO analysis showed that metal ion binding, sulfur binding, and electron transport were among the principal molecular functions. Furthermore, the biological network analysis generated three interlinked clusters containing genes involved in metal ion binding, cellular respiration, and electron transfer, which suggests the involvement of the extracted gene set in biocorrosion. Finally, the dataset was validated through manual curation, yielding a similar set of genes as our workflow; among these, hysB and hydA, and sat and dsrB were identified as the metal ion binding and sulfur metabolism genes, respectively. The identified genes were mapped with the pangenome of 63 SRB genomes that yielded the distribution of these genes across 63 SRB based on the amino acid sequence similarity and were further categorized as core and accessory gene families. SRB's role in biocorrosion involves the transfer of electrons from the metal surface via a hydrogen medium to the sulfate reduction pathway. Therefore, genes encoding hydrogenases and cytochromes might be participating in removing hydrogen from the metals through electron transfer. Moreover, the production of corrosive sulfide from the sulfur metabolism indirectly contributes to the localized pitting of the metals. After the corroboration of text mining results with SRB biocorrosion mechanisms, we suggest that the text mining framework could be utilized for genes/proteins extraction and significantly reduce the manual curation time.}, } @article {pmid36677403, year = {2022}, author = {Romero-Calle, DX and Pedrosa-Silva, F and Tomé, LMR and Sousa, TJ and de Oliveira Santos, LTS and de Carvalho Azevedo, VA and Brenig, B and Benevides, RG and Venancio, TM and Billington, C and Góes-Neto, A}, title = {Hybrid Genomic Analysis of Salmonella enterica Serovar Enteritidis SE3 Isolated from Polluted Soil in Brazil.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, pmid = {36677403}, issn = {2076-2607}, support = {001//Coordenação de Aperfeicoamento de Pessoal de Nível Superior/ ; }, abstract = {In Brazil, Salmonella enterica serovar Enteritidis is a significant health threat. Salmonella enterica serovar Enteritidis SE3 was isolated from soil at the Subaé River in Santo Amaro, Brazil, a region contaminated with heavy metals and organic waste. Illumina HiSeq and Oxford Nanopore Technologies MinION sequencing were used for de novo hybrid assembly of the Salmonella SE3 genome. This approach yielded 10 contigs with 99.98% identity with S. enterica serovar Enteritidis OLF-SE2-98984-6. Twelve Salmonella pathogenic islands, multiple virulence genes, multiple antimicrobial gene resistance genes, seven phage defense systems, seven prophages and a heavy metal resistance gene were encoded in the genome. Pangenome analysis of the S. enterica clade, including Salmonella SE3, revealed an open pangenome, with a core genome of 2137 genes. Our study showed the effectiveness of a hybrid sequence assembly approach for environmental Salmonella genome analysis using HiSeq and MinION data. This approach enabled the identification of key resistance and virulence genes, and these data are important to inform the control of Salmonella and heavy metal pollution in the Santo Amaro region of Brazil.}, } @article {pmid36677357, year = {2022}, author = {Myintzaw, P and Pennone, V and McAuliffe, O and Begley, M and Callanan, M}, title = {Variability in Cold Tolerance of Food and Clinical Listeria monocytogenes Isolates.}, journal = {Microorganisms}, volume = {11}, number = {1}, pages = {}, pmid = {36677357}, issn = {2076-2607}, support = {15F604 and 2019R495.//Department of Food Agriculture and the Marine, Ireland/ ; }, abstract = {The aim of this study was to investigate the level of strain variability amongst food and clinical Listeria monocytogenes isolates growing at low temperatures (4 and 7 °C) in both laboratory media and real food matrices. Isolates (n = 150) grown in laboratory media demonstrated a large variation in growth profiles measured using optical density. Overall, it was noted that clinical isolates exhibited a significantly higher growth rate (p ≤ 0.05) at 7 °C than the other isolates. Analysis of variance (ANOVA) tests of isolates grouped using Multi Locus Sequence Typing (MLST) revealed that clonal complex 18 (CC18) isolates were significantly (p ≤ 0.05) faster growing at 4 °C than other CC-type isolates while CC101, CC18, CC8, CC37 and CC14 were faster growing than other CC types at 7 °C. Euclidean distance and Ward method-based hierarchical clustering of mean growth rates classified 33.33% of isolates as faster growing. Fast and slow growing representative isolates were selected from the cluster analysis and growth rates were determined using plate count data in laboratory media and model food matrices. In agreement with the optical density experiments, CC18 isolates were faster and CC121 isolates were slower than other CC types in laboratory media, UHT milk and fish pie. The same trend was observed in chocolate milk but the differences were not statistically significant. Moreover, pan-genome analysis (Scoary) of isolate genome sequences only identified six genes of unknown function associated with increased cold tolerance while failing to identify any known cold tolerance genes. Overall, an association that was consistent in laboratory media and real food matrices was demonstrated between isolate CC type and increased cold tolerance.}, } @article {pmid36675897, year = {2023}, author = {Bigey, F and Pasteur, E and Połomska, X and Thomas, S and Crutz-Le Coq, AM and Devillers, H and Neuvéglise, C}, title = {Insights into the Genomic and Phenotypic Landscape of the Oleaginous Yeast Yarrowia lipolytica.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {9}, number = {1}, pages = {}, pmid = {36675897}, issn = {2309-608X}, support = {AIP-Bioressources 2011//National Research Institute for Agriculture, Food and Environment/ ; convention 2012 93 0805//DGAC/ ; Investment for the Future ("Investissements d'Avenir"), grant number ANR-001//SAS PIVERT/ ; }, abstract = {Although Yarrowia lipolytica is a model yeast for the study of lipid metabolism, its diversity is poorly known, as studies generally consider only a few standard laboratory strains. To extend our knowledge of this biotechnological workhorse, we investigated the genomic and phenotypic diversity of 56 natural isolates. Y. lipolytica is classified into five clades with no correlation between clade membership and geographic or ecological origin. A low genetic diversity (π = 0.0017) and a pan-genome (6528 genes) barely different from the core genome (6315 genes) suggest Y. lipolytica is a recently evolving species. Large segmental duplications were detected, totaling 892 genes. With three new LTR-retrotransposons of the Gypsy family (Tyl4, Tyl9, and Tyl10), the transposable element content of genomes appeared diversified but still low (from 0.36% to 3.62%). We quantified 34 traits with substantial phenotypic diversity, but genome-wide association studies failed to evidence any associations. Instead, we investigated known genes and found four mutational events leading to XPR2 protease inactivation. Regarding lipid metabolism, most high-impact mutations were found in family-belonging genes, such as ALK or LIP, and therefore had a low phenotypic impact, suggesting that the huge diversity of lipid synthesis and accumulation is multifactorial or due to complex regulations.}, } @article {pmid36671332, year = {2023}, author = {Fono-Tamo, EUK and Kamika, I and Dewar, JB and Lekota, KE}, title = {Comparative Genomics Revealed a Potential Threat of Aeromonas rivipollensis G87 Strain and Its Antibiotic Resistance.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {12}, number = {1}, pages = {}, pmid = {36671332}, issn = {2079-6382}, support = {TTK200306508304//National Research Foundation/ ; }, abstract = {Aeromonas rivipollensis is an emerging pathogen linked to a broad range of infections in humans. Due to the inability to accurately differentiate Aeromonas species using conventional techniques, in-depth comparative genomics analysis is imperative to identify them. This study characterized 4 A. rivipollensis strains that were isolated from river water in Johannesburg, South Africa, by whole-genome sequencing (WGS). WGS was carried out, and taxonomic classification was employed to profile virulence and antibiotic resistance (AR). The AR profiles of the A. rivipollensis genomes consisted of betalactams and cephalosporin-resistance genes, while the tetracycline-resistance gene (tetE) was only determined to be in the G87 strain. A mobile genetic element (MGE), transposons TnC, was determined to be in this strain that mediates tetracycline resistance MFS efflux tetE. A pangenomic investigation revealed the G87 strain's unique characteristic, which included immunoglobulin A-binding proteins, extracellular polysialic acid, and exogenous sialic acid as virulence factors. The identified polysialic acid and sialic acid genes can be associated with antiphagocytic and antibactericidal properties, respectively. MGEs such as transposases introduce virulence and AR genes in the A. rivipollensis G87 genome. This study showed that A. rivipollensis is generally resistant to a class of beta-lactams and cephalosporins. MGEs pose a challenge in some of the Aeromonas species strains and are subjected to antibiotics resistance and the acquisition of virulence genes in the ecosystem.}, } @article {pmid36671226, year = {2022}, author = {Thakur, Z and Vaid, RK and Anand, T and Tripathi, BN}, title = {Comparative Genome Analysis of 19 Trueperella pyogenes Strains Originating from Different Animal Species Reveal a Genetically Diverse Open Pan-Genome.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {12}, number = {1}, pages = {}, pmid = {36671226}, issn = {2079-6382}, support = {IXX11884//National Research Centre on Equines/ ; }, abstract = {Trueperella pyogenes is a Gram-positive opportunistic pathogen that causes severe cases of mastitis, metritis, and pneumonia in a wide range of animals, resulting in significant economic losses. Although little is known about the virulence factors involved in the disease pathogenesis, a comprehensive comparative genome analysis of T. pyogenes genomes has not been performed till date. Hence, present investigation was carried out to characterize and compare 19 T. pyogenes genomes originating in different geographical origins including the draftgenome of the first Indian origin strain T. pyogenes Bu5. Additionally, candidate virulence determinants that could be crucial for their pathogenesis were also detected and analyzed by using various bioinformatics tools. The pan-genome calculations revealed an open pan-genome of T. pyogenes. In addition, an inventory of virulence related genes, 190 genomic islands, 31 prophage sequences, and 40 antibiotic resistance genes that could play a significant role in organism's pathogenicity were detected. The core-genome based phylogeny of T. pyogenes demonstrates a polyphyletic, host-associated group with a high degree of genomic diversity. The identified core-genome can be further used for screening of drug and vaccine targets. The investigation has provided unique insights into pan-genome, virulome, mobiliome, and resistome of T. pyogenes genomes and laid the foundation for future investigations.}, } @article {pmid36669850, year = {2023}, author = {Tonkin-Hill, G and Gladstone, RA and Pöntinen, AK and Arredondo-Alonso, S and Bentley, SD and Corander, J}, title = {Robust analysis of prokaryotic pangenome gene gain and loss rates with Panstripe.}, journal = {Genome research}, volume = {33}, number = {1}, pages = {129-140}, pmid = {36669850}, issn = {1549-5469}, support = {204016/Z/16/Z//Wellcome Trust/United Kingdom ; 206194//Wellcome Trust/United Kingdom ; }, mesh = {Humans ; Phylogeny ; *Evolution, Molecular ; *Prokaryotic Cells ; Genome, Bacterial ; Gene Transfer, Horizontal ; }, abstract = {Horizontal gene transfer (HGT) plays a critical role in the evolution and diversification of many microbial species. The resulting dynamics of gene gain and loss can have important implications for the development of antibiotic resistance and the design of vaccine and drug interventions. Methods for the analysis of gene presence/absence patterns typically do not account for errors introduced in the automated annotation and clustering of gene sequences. In particular, methods adapted from ecological studies, including the pangenome gene accumulation curve, can be misleading as they may reflect the underlying diversity in the temporal sampling of genomes rather than a difference in the dynamics of HGT. Here, we introduce Panstripe, a method based on generalized linear regression that is robust to population structure, sampling bias, and errors in the predicted presence/absence of genes. We show using simulations that Panstripe can effectively identify differences in the rate and number of genes involved in HGT events, and illustrate its capability by analyzing several diverse bacterial genome data sets representing major human pathogens.}, } @article {pmid36662619, year = {2023}, author = {Secomandi, S and Gallo, GR and Sozzoni, M and Iannucci, A and Galati, E and Abueg, L and Balacco, J and Caprioli, M and Chow, W and Ciofi, C and Collins, J and Fedrigo, O and Ferretti, L and Fungtammasan, A and Haase, B and Howe, K and Kwak, W and Lombardo, G and Masterson, P and Messina, G and Møller, AP and Mountcastle, J and Mousseau, TA and Ferrer Obiol, J and Olivieri, A and Rhie, A and Rubolini, D and Saclier, M and Stanyon, R and Stucki, D and Thibaud-Nissen, F and Torrance, J and Torroni, A and Weber, K and Ambrosini, R and Bonisoli-Alquati, A and Jarvis, ED and Gianfranceschi, L and Formenti, G}, title = {A chromosome-level reference genome and pangenome for barn swallow population genomics.}, journal = {Cell reports}, volume = {42}, number = {1}, pages = {111992}, pmid = {36662619}, issn = {2211-1247}, support = {/HHMI/Howard Hughes Medical Institute/United States ; }, mesh = {Animals ; *Swallows/genetics ; Metagenomics ; Genome/genetics ; Genomics ; Chromosomes ; }, abstract = {Insights into the evolution of non-model organisms are limited by the lack of reference genomes of high accuracy, completeness, and contiguity. Here, we present a chromosome-level, karyotype-validated reference genome and pangenome for the barn swallow (Hirundo rustica). We complement these resources with a reference-free multialignment of the reference genome with other bird genomes and with the most comprehensive catalog of genetic markers for the barn swallow. We identify potentially conserved and accelerated genes using the multialignment and estimate genome-wide linkage disequilibrium using the catalog. We use the pangenome to infer core and accessory genes and to detect variants using it as a reference. Overall, these resources will foster population genomics studies in the barn swallow, enable detection of candidate genes in comparative genomics studies, and help reduce bias toward a single reference genome.}, } @article {pmid36646895, year = {2023}, author = {Sibbesen, JA and Eizenga, JM and Novak, AM and Sirén, J and Chang, X and Garrison, E and Paten, B}, title = {Haplotype-aware pantranscriptome analyses using spliced pangenome graphs.}, journal = {Nature methods}, volume = {20}, number = {2}, pages = {239-247}, pmid = {36646895}, issn = {1548-7105}, support = {U01HG010961//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; R01HG010485//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U41HG010972//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; U24HG011853//U.S. Department of Health & Human Services | NIH | National Human Genome Research Institute (NHGRI)/ ; OT2 OD026682/OD/NIH HHS/United States ; }, mesh = {Haplotypes ; *Computational Biology ; *Gene Expression Profiling ; Metagenomics ; Transcriptome ; }, abstract = {Pangenomics is emerging as a powerful computational paradigm in bioinformatics. This field uses population-level genome reference structures, typically consisting of a sequence graph, to mitigate reference bias and facilitate analyses that were challenging with previous reference-based methods. In this work, we extend these methods into transcriptomics to analyze sequencing data using the pantranscriptome: a population-level transcriptomic reference. Our toolchain, which consists of additions to the VG toolkit and a standalone tool, RPVG, can construct spliced pangenome graphs, map RNA sequencing data to these graphs, and perform haplotype-aware expression quantification of transcripts in a pantranscriptome. We show that this workflow improves accuracy over state-of-the-art RNA sequencing mapping methods, and that it can efficiently quantify haplotype-specific transcript expression without needing to characterize the haplotypes of a sample beforehand.}, } @article {pmid36646262, year = {2023}, author = {Mishra, A and Kesarwani, S and Jaiswal, TP and Bhattacharjee, S and Chakraborty, S and Mishra, AK and Singh, SS}, title = {Decoding whole genome of Anoxybacillus rupiensis TPH1 isolated from tatapani hot spring, India and giving insight into bioremediation ability of TPH1 via heavy metals and azo dyes.}, journal = {Research in microbiology}, volume = {174}, number = {4}, pages = {104027}, doi = {10.1016/j.resmic.2023.104027}, pmid = {36646262}, issn = {1769-7123}, mesh = {*Anoxybacillus/genetics ; *Hot Springs ; Biodegradation, Environmental ; Azo Compounds/metabolism ; Molecular Docking Simulation ; *Metals, Heavy/metabolism ; Phylogeny ; }, abstract = {A moderately thermophilic, gram-positive genomospecies Anoxybacillus rupiensis TPH1 was isolated from Tatapani hot spring, Chhattisgarh, India. Genome of 3.70 Mb with 42.3% GC subsumed 4131 CDSs, 65 tRNA, 5 rRNA, 35 AMR and 19 drug target genes. Further, comparative genomics of 19 Anoxybacillus spp. exhibited an open pan genome of 13102 genes along with core (10.62%), unique (43.5%) and accessory (45.9%) genes. Moreover, phylogenomic tree displayed clustering of Anoxybacillus spp. into two distinct clades where clade A species harbored larger genomes, more unique genes, CDS and hypothetical proteins than clade B species. Further, distribution of azoreductases showed FMN-binding NADPH azoreductase (AzoRed1) presence in clade A species only and FMN-binding NADH azoreductase (AzoRed2) harboring by species of both clades. Heavy metal resistance genes distribution showed omnipresence of znuA, copZ and arsC in both clades, dispersed presence of cbiM, czcD, merA and feoB over both clades and harboring of nikA and acr3 by few species of clade A only. Additionally, molecular docking of AzoRed1, AzoRed2, ZnuA, CopZ, Acr3, CbiM, CzcD, MerA and NikA with their respective ligands indicated high affinity and stable binding. Conclusively, present study provided insight into gene repertoire of genus Anoxybacillus and a basis for the potential application of this thermophile in bioremediation of azo dyes and heavy metals.}, } @article {pmid36644533, year = {2022}, author = {Pang, M and Tu, T and Wang, Y and Zhang, P and Ren, M and Yao, X and Luo, Y and Yang, Z}, title = {Design of a multi-epitope vaccine against Haemophilus parasuis based on pan-genome and immunoinformatics approaches.}, journal = {Frontiers in veterinary science}, volume = {9}, number = {}, pages = {1053198}, pmid = {36644533}, issn = {2297-1769}, abstract = {BACKGROUND: Glässer's disease, caused by Haemophilus parasuis (HPS), is responsible for economic losses in the pig industry worldwide. However, the existing commercial vaccines offer poor protection and there are significant barriers to the development of effective vaccines.

METHODS: In the current study, we aimed to identify potential vaccine candidates and design a multi-epitope vaccine against HPS by performing pan-genomic analysis of 121 strains and using a reverse vaccinology approach.

RESULTS: The designed vaccine constructs consist of predicted epitopes of B and T cells derived from the outer membrane proteins of the HPS core genome. The vaccine was found to be highly immunogenic, non-toxic, and non-allergenic as well as have stable physicochemical properties. It has a high binding affinity to Toll-like receptor 2. In addition, in silico immune simulation results showed that the vaccine elicited an effective immune response. Moreover, the mouse polyclonal antibody obtained by immunizing the vaccine protein can be combined with different serotypes and non-typable Haemophilus parasuis in vitro.

CONCLUSION: The overall results of the study suggest that the designed multi-epitope vaccine is a promising candidate for pan-prophylaxis against different strains of HPS.}, } @article {pmid36638170, year = {2023}, author = {Cai, H and McLimans, CJ and Beyer, JE and Krumholz, LR and Hambright, KD}, title = {Microcystis pangenome reveals cryptic diversity within and across morphospecies.}, journal = {Science advances}, volume = {9}, number = {2}, pages = {eadd3783}, pmid = {36638170}, issn = {2375-2548}, mesh = {Humans ; *Microcystis/genetics ; Phylogeny ; Base Sequence ; Ecology ; }, abstract = {Microcystis, a common harmful algal bloom (HAB) taxon, threatens water supplies and human health, yet species delimitation is contentious in this taxon, leading to challenges in research and management of this threat. Historical and common morphology-based classifications recognize multiple morphospecies, most with variable and diverse ecologies, while DNA sequence-based classifications indicate a single species with multiple ecotypes. To better delimit Microcystis species, we conducted a pangenome analysis of 122 genomes. Core- and non-core gene phylogenetic analyses placed 113 genomes into 23 monophyletic clusters containing at least two genomes. Overall, genome-related indices revealed that Microcystis contains at least 16 putative genospecies. Fifteen genospecies included at least one Microcystis aeruginosa morphospecies, and 10 genospecies included two or more morphospecies. This classification system will enable consistent taxonomic identification of Microcystis and thereby aid in resolving some of the complexities and controversies that have long characterized eco-evolutionary research and management of this important HAB taxon.}, } @article {pmid36630500, year = {2023}, author = {Konno, N and Iwasaki, W}, title = {Machine learning enables prediction of metabolic system evolution in bacteria.}, journal = {Science advances}, volume = {9}, number = {2}, pages = {eadc9130}, pmid = {36630500}, issn = {2375-2548}, mesh = {Phylogeny ; *Bacteria/genetics ; *Evolution, Molecular ; Genomics ; Genome, Bacterial ; }, abstract = {Evolution prediction is a long-standing goal in evolutionary biology, with potential impacts on strategic pathogen control, genome engineering, and synthetic biology. While laboratory evolution studies have shown the predictability of short-term and sequence-level evolution, that of long-term and system-level evolution has not been systematically examined. Here, we show that the gene content evolution of metabolic systems is generally predictable by applying ancestral gene content reconstruction and machine learning techniques to ~3000 bacterial genomes. Our framework, Evodictor, successfully predicted gene gain and loss evolution at the branches of the reference phylogenetic tree, suggesting that evolutionary pressures and constraints on metabolic systems are universally shared. Investigation of pathway architectures and meta-analysis of metagenomic datasets confirmed that these evolutionary patterns have physiological and ecological bases as functional dependencies among metabolic reactions and bacterial habitat changes. Last, pan-genomic analysis of intraspecies gene content variations proved that even "ongoing" evolution in extant bacterial species is predictable in our framework.}, } @article {pmid36627554, year = {2023}, author = {Forgacova, N and Holesova, Z and Hekel, R and Sedlackova, T and Pos, Z and Krivosikova, L and Janega, P and Kuracinova, KM and Babal, P and Radvak, P and Radvanszky, J and Gazdarica, J and Budis, J and Szemes, T}, title = {Evaluation and limitations of different approaches among COVID-19 fatal cases using whole-exome sequencing data.}, journal = {BMC genomics}, volume = {24}, number = {1}, pages = {12}, pmid = {36627554}, issn = {1471-2164}, support = {PP-COVID-20-051//Pangenomics for personalized clinical management of infected persons based on identified viral genome and human exoma (Code ITMS:313011ATL7), co-financed by the European Regional Development Fund; co financed by the Slovak Research and Development Agency grant PP-COVID-20-051./ ; PP-COVID-20-051//Pangenomics for personalized clinical management of infected persons based on identified viral genome and human exoma (Code ITMS:313011ATL7), co-financed by the European Regional Development Fund; co financed by the Slovak Research and Development Agency grant PP-COVID-20-051./ ; PP-COVID-20-051//Pangenomics for personalized clinical management of infected persons based on identified viral genome and human exoma (Code ITMS:313011ATL7), co-financed by the European Regional Development Fund; co financed by the Slovak Research and Development Agency grant PP-COVID-20-051./ ; }, mesh = {Humans ; *COVID-19/genetics ; SARS-CoV-2 ; Exome Sequencing ; Alleles ; DNA ; }, abstract = {BACKGROUND: COVID-19 caused by the SARS-CoV-2 infection may result in various disease symptoms and severity, ranging from asymptomatic, through mildly symptomatic, up to very severe and even fatal cases. Although environmental, clinical, and social factors play important roles in both susceptibility to the SARS-CoV-2 infection and progress of COVID-19 disease, it is becoming evident that both pathogen and host genetic factors are important too. In this study, we report findings from whole-exome sequencing (WES) of 27 individuals who died due to COVID-19, especially focusing on frequencies of DNA variants in genes previously associated with the SARS-CoV-2 infection and the severity of COVID-19.

RESULTS: We selected the risk DNA variants/alleles or target genes using four different approaches: 1) aggregated GWAS results from the GWAS Catalog; 2) selected publications from PubMed; 3) the aggregated results of the Host Genetics Initiative database; and 4) a commercial DNA variant annotation/interpretation tool providing its own knowledgebase. We divided these variants/genes into those reported to influence the susceptibility to the SARS-CoV-2 infection and those influencing the severity of COVID-19. Based on the above, we compared the frequencies of alleles found in the fatal COVID-19 cases to the frequencies identified in two population control datasets (non-Finnish European population from the gnomAD database and genomic frequencies specific for the Slovak population from our own database). When compared to both control population datasets, our analyses indicated a trend of higher frequencies of severe COVID-19 associated risk alleles among fatal COVID-19 cases. This trend reached statistical significance specifically when using the HGI-derived variant list. We also analysed other approaches to WES data evaluation, demonstrating its utility as well as limitations.

CONCLUSIONS: Although our results proved the likely involvement of host genetic factors pointed out by previous studies looking into severity of COVID-19 disease, careful considerations of the molecular-testing strategies and the evaluated genomic positions may have a strong impact on the utility of genomic testing.}, } @article {pmid36627170, year = {2023}, author = {Nii, T and Maeda, Y and Motooka, D and Naito, M and Matsumoto, Y and Ogawa, T and Oguro-Igashira, E and Kishikawa, T and Yamashita, M and Koizumi, S and Kurakawa, T and Okumura, R and Kayama, H and Murakami, M and Sakaguchi, T and Das, B and Nakamura, S and Okada, Y and Kumanogoh, A and Takeda, K}, title = {Genomic repertoires linked with pathogenic potency of arthritogenic Prevotella copri isolated from the gut of patients with rheumatoid arthritis.}, journal = {Annals of the rheumatic diseases}, volume = {82}, number = {5}, pages = {621-629}, pmid = {36627170}, issn = {1468-2060}, mesh = {Animals ; Mice ; *Gastrointestinal Microbiome/genetics ; *Arthritis, Rheumatoid/genetics ; Prevotella/genetics ; Genomics ; Disease Models, Animal ; }, abstract = {OBJECTIVES: Prevotella copri is considered to be a contributing factor in rheumatoid arthritis (RA). However, in some non-Westernised countries, healthy individuals also harbour an abundance of P. copri in the intestine. This study investigated the pathogenicity of RA patient-derived P. copri (P. copri RA) compared with healthy control-derived P. copri (P. copri HC).

METHODS: We obtained 13 P. copri strains from the faeces of patients with RA and healthy controls. Following whole genome sequencing, the sequences of P. copri RA and P. copri HC were compared. To analyse the arthritis-inducing ability of P. copri, we examined two arthritis models (1) a collagen-induced arthritis model harbouring P. copri under specific-pathogen-free conditions and (2) an SKG mouse arthritis model under P. copri-monocolonised conditions. Finally, to evaluate the ability of P. copri to activate innate immune cells, we performed in vitro stimulation of bone marrow-derived dendritic cells (BMDCs) by P. copri RA and P. copri HC.

RESULTS: Comparative genomic analysis revealed no apparent differences in the core gene contents between P. copri RA and P. copri HC, but pangenome analysis revealed the high genome plasticity of P. copri. We identified a P. copri RA-specific genomic region as a conjugative transposon. In both arthritis models, P. copri RA-induced more severe arthritis than P. copri HC. In vitro BMDC stimulation experiments revealed the upregulation of IL-17 and Th17-related cytokines (IL-6, IL-23) by P. copri RA.

CONCLUSION: Our findings reveal the genetic diversity of P. copri, and the genomic signatures associated with strong arthritis-inducing ability of P. copri RA. Our study contributes towards elucidation of the complex pathogenesis of RA.}, } @article {pmid36623869, year = {2022}, author = {Ruggieri, AA and Livraghi, L and Lewis, JJ and Evans, E and Cicconardi, F and Hebberecht, L and Ortiz-Ruiz, Y and Montgomery, SH and Ghezzi, A and Rodriguez-Martinez, JA and Jiggins, CD and McMillan, WO and Counterman, BA and Papa, R and Van Belleghem, SM}, title = {Erratum: A butterfly pan-genome reveals that a large amount of structural variation underlies the evolution of chromatin accessibility.}, journal = {Genome research}, volume = {32}, number = {11-12}, pages = {2145}, doi = {10.1101/gr.277534.122}, pmid = {36623869}, issn = {1549-5469}, } @article {pmid36622155, year = {2023}, author = {Saak, CC and Pierce, EC and Dinh, CB and Portik, D and Hall, R and Ashby, M and Dutton, RJ}, title = {Longitudinal, Multi-Platform Metagenomics Yields a High-Quality Genomic Catalog and Guides an In Vitro Model for Cheese Communities.}, journal = {mSystems}, volume = {8}, number = {1}, pages = {e0070122}, pmid = {36622155}, issn = {2379-5077}, support = {DP2 AT010401/AT/NCCIH NIH HHS/United States ; }, mesh = {Humans ; *Cheese/microbiology ; Metagenomics ; Bacteria ; Metagenome/genetics ; *Microbiota/genetics ; }, abstract = {Microbiomes are intricately intertwined with human health, geochemical cycles, and food production. While many microbiomes of interest are highly complex and experimentally intractable, cheese rind microbiomes have proven to be powerful model systems for the study of microbial interactions. To provide a more comprehensive view of the genomic potential and temporal dynamics of cheese rind communities, we combined longitudinal, multi-platform metagenomics of three ripening washed-rind cheeses with whole-genome sequencing of community isolates. Sequencing-based approaches revealed a highly reproducible microbial succession in each cheese and the coexistence of closely related Psychrobacter species and enabled the prediction of plasmid and phage diversity and their host associations. In combination with culture-based approaches, we established a genomic catalog and a paired 16-member in vitro washed-rind cheese system. The combination of multi-platform metagenomic time-series data and an in vitro model provides a rich resource for further investigation of cheese rind microbiomes both computationally and experimentally. IMPORTANCE Metagenome sequencing can provide great insights into microbiome composition and function and help researchers develop testable hypotheses. Model microbiomes, such as those composed of cheese rind bacteria and fungi, allow the testing of these hypotheses in a controlled manner. Here, we first generated an extensive longitudinal metagenomic data set. This data set reveals successional dynamics, yields a phyla-spanning bacterial genomic catalog, associates mobile genetic elements with their hosts, and provides insights into functional enrichment of Psychrobacter in the cheese environment. Next, we show that members of the washed-rind cheese microbiome lend themselves to in vitro community reconstruction. This paired metagenomic data and in vitro system can thus be used as a platform for generating and testing hypotheses related to the dynamics within, and the functions associated with, cheese rind microbiomes.}, } @article {pmid36621865, year = {2023}, author = {Zhang, Z and Li, K and Zhang, H and Wang, Q and Zhao, L and Liu, J and Chen, H}, title = {A single silk- and multiple pollen-expressed PMEs at the Ga1 locus modulate maize unilateral cross-incompatibility.}, journal = {Journal of integrative plant biology}, volume = {65}, number = {5}, pages = {1344-1355}, doi = {10.1111/jipb.13445}, pmid = {36621865}, issn = {1744-7909}, mesh = {Germ Cells, Plant ; Plant Breeding ; Pollen/genetics ; *Zea mays/genetics/metabolism ; }, abstract = {The Gametophyte factor1 (Ga1) locus in maize confers unilateral cross-incompatibility (UCI), and it is controlled by both pollen and silk-specific determinants. Although the Ga1 locus has been reported for more than a century and is widely utilized in maize breeding programs, only the pollen-specific ZmGa1P has been shown to function as a male determinant; thus, the genomic structure of the Ga1 locus and all the determinants that control UCI at this locus have not yet been fully characterized. Here, we used map-based cloning to confirm the determinants of UCI at the Ga1 locus and maize pan-genome sequence data to characterize the genomic structure of the Ga1 locus. The Ga1 locus comprises one silk-expressed pectin methylesterase gene (PME) (ZmGa1F) and eight pollen-expressed PMEs (ZmGa1P and ZmGa1PL1-7). Knockout of ZmGa1F in Ga1/Ga1 lines leads to the complete loss of the female barrier function. The expression of individual ZmGa1PL genes in a ga1/ga1 background endows ga1 pollen with the ability to overcome the female barrier of the Ga1 locus. These findings, combined with genomic data and genetic analyses, indicate that the Ga1 locus is modulated by a single female determinant and multiple male determinants, which are tightly linked. The results of this study provide valuable insights into the genomic structure of the Ga2 and Tcb1 loci and will aid applications of these loci in maize breeding programs.}, } @article {pmid36619820, year = {2023}, author = {Khushboo, and Singhvi, N and Gupta, V and Dhaka, N and Dubey, KK}, title = {Draft genome sequence of Streptomyces sp. KD18, isolated from industrial soil.}, journal = {3 Biotech}, volume = {13}, number = {1}, pages = {34}, pmid = {36619820}, issn = {2190-572X}, abstract = {UNLABELLED: The present study scrutinizes the presence of Streptomyces strains in the soil sample collected from industrial area of Bahadurgarh (Haryana) India. The morphological approach manifested the isolated strain belong to Streptomyces species and named as Streptomyces sp. KD18. Sequencing of Streptomyces sp. KD18 genome was performed by Illumina Nextseq500 platform. 65 contigs were generated via SPAdes v3.11.1 and harboured genome size of 7.2 Mb. AntiSMASH server revealed the presence of 25 biosynthetic gene clusters in KD18 genome where BGC of lipstatin was of more interest from industrial and pharmaceutical purpose. The draft genome sequence represented via ANI values claimed that the KD18 strain belongs to Streptomyces toxytricini and finally named as S. toxytricini KD18. The LC-MS analysis of the extracted metabolite confirmed the production of lipstatin. The genome sequence data have been deposited to NCBI under the accession number of GCA_014748315.1.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s13205-022-03453-3.}, } @article {pmid36618639, year = {2022}, author = {Parakkunnel, R and Naik K, B and Vanishree, G and C, S and Purru, S and Bhaskar K, U and Bhat, KV and Kumar, S}, title = {Gene fusions, micro-exons and splice variants define stress signaling by AP2/ERF and WRKY transcription factors in the sesame pan-genome.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1076229}, pmid = {36618639}, issn = {1664-462X}, abstract = {Evolutionary dynamics of AP2/ERF and WRKY genes, the major components of defense response were studied extensively in the sesame pan-genome. Massive variation was observed for gene copy numbers, genome location, domain structure, exon-intron structure and protein parameters. In the pan-genome, 63% of AP2/ERF members were devoid of introns whereas >99% of WRKY genes contained multiple introns. AP2 subfamily was found to be micro-exon rich with the adjoining intronic sequences sharing sequence similarity to many stress-responsive and fatty acid metabolism genes. WRKY family included extensive multi-domain gene fusions where the additional domains significantly enhanced gene and exonic sizes as well as gene copy numbers. The fusion genes were found to have roles in acquired immunity, stress response, cell and membrane integrity as well as ROS signaling. The individual genomes shared extensive synteny and collinearity although ecological adaptation was evident among the Chinese and Indian accessions. Significant positive selection effects were noticed for both micro-exon and multi-domain genes. Splice variants with changes in acceptor, donor and branch sites were common and 6-7 splice variants were detected per gene. The study ascertained vital roles of lipid metabolism and chlorophyll biosynthesis in the defense response and stress signaling pathways. 60% of the studied genes localized in the nucleus while 20% preferred chloroplast. Unique cis-element distribution was noticed in the upstream promoter region with MYB and STRE in WRKY genes while MYC was present in the AP2/ERF genes. Intron-less genes exhibited great diversity in the promoter sequences wherein the predominance of dosage effect indicated variable gene expression levels. Mimicking the NBS-LRR genes, a chloroplast localized WRKY gene, Swetha_24868, with additional domains of chorismate mutase, cAMP and voltage-dependent potassium channel was found to act as a master regulator of defense signaling, triggering immunity and reducing ROS levels.}, } @article {pmid36614303, year = {2023}, author = {Schanknecht, E and Bachari, A and Nassar, N and Piva, T and Mantri, N}, title = {Phytochemical Constituents and Derivatives of Cannabis sativa; Bridging the Gap in Melanoma Treatment.}, journal = {International journal of molecular sciences}, volume = {24}, number = {1}, pages = {}, pmid = {36614303}, issn = {1422-0067}, support = {Not applicable//RMIT University/ ; }, mesh = {Humans ; *Cannabis/chemistry ; *Cannabinoids/pharmacology/therapeutic use/chemistry ; Terpenes/pharmacology ; *Melanoma/drug therapy ; Phytochemicals/pharmacology/therapeutic use ; }, abstract = {Melanoma is deadly, physically impairing, and has ongoing treatment deficiencies. Current treatment regimens include surgery, targeted kinase inhibitors, immunotherapy, and combined approaches. Each of these treatments face pitfalls, with diminutive five-year survival in patients with advanced metastatic invasion of lymph and secondary organ tissues. Polyphenolic compounds, including cannabinoids, terpenoids, and flavonoids; both natural and synthetic, have emerging evidence of nutraceutical, cosmetic and pharmacological potential, including specific anti-cancer, anti-inflammatory, and palliative utility. Cannabis sativa is a wellspring of medicinal compounds whose direct and adjunctive application may offer considerable relief for melanoma suffers worldwide. This review aims to address the diverse applications of C. sativa's biocompounds in the scope of melanoma and suggest it as a strong candidate for ongoing pharmacological evaluation.}, } @article {pmid36608657, year = {2023}, author = {Hackl, T and Laurenceau, R and Ankenbrand, MJ and Bliem, C and Cariani, Z and Thomas, E and Dooley, KD and Arellano, AA and Hogle, SL and Berube, P and Leventhal, GE and Luo, E and Eppley, JM and Zayed, AA and Beaulaurier, J and Stepanauskas, R and Sullivan, MB and DeLong, EF and Biller, SJ and Chisholm, SW}, title = {Novel integrative elements and genomic plasticity in ocean ecosystems.}, journal = {Cell}, volume = {186}, number = {1}, pages = {47-62.e16}, doi = {10.1016/j.cell.2022.12.006}, pmid = {36608657}, issn = {1097-4172}, mesh = {*Ecosystem ; *Genome, Bacterial/genetics ; Phylogeny ; Oceans and Seas ; Genomics ; }, abstract = {Horizontal gene transfer accelerates microbial evolution. The marine picocyanobacterium Prochlorococcus exhibits high genomic plasticity, yet the underlying mechanisms are elusive. Here, we report a novel family of DNA transposons-"tycheposons"-some of which are viral satellites while others carry cargo, such as nutrient-acquisition genes, which shape the genetic variability in this globally abundant genus. Tycheposons share distinctive mobile-lifecycle-linked hallmark genes, including a deep-branching site-specific tyrosine recombinase. Their excision and integration at tRNA genes appear to drive the remodeling of genomic islands-key reservoirs for flexible genes in bacteria. In a selection experiment, tycheposons harboring a nitrate assimilation cassette were dynamically gained and lost, thereby promoting chromosomal rearrangements and host adaptation. Vesicles and phage particles harvested from seawater are enriched in tycheposons, providing a means for their dispersal in the wild. Similar elements are found in microbes co-occurring with Prochlorococcus, suggesting a common mechanism for microbial diversification in the vast oligotrophic oceans.}, } @article {pmid36607068, year = {2023}, author = {Wong, ED and Miyasato, SR and Aleksander, S and Karra, K and Nash, RS and Skrzypek, MS and Weng, S and Engel, SR and Cherry, JM}, title = {Saccharomyces genome database update: server architecture, pan-genome nomenclature, and external resources.}, journal = {Genetics}, volume = {224}, number = {1}, pages = {}, pmid = {36607068}, issn = {1943-2631}, support = {U41 HG002273/HG/NHGRI NIH HHS/United States ; U24 HG001315/HG/NHGRI NIH HHS/United States ; U24 HG010859/HG/NHGRI NIH HHS/United States ; U24 HG012212/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Saccharomyces/genetics ; Saccharomyces cerevisiae/genetics ; Genome, Fungal ; Databases, Genetic ; Software ; }, abstract = {As one of the first model organism knowledgebases, Saccharomyces Genome Database (SGD) has been supporting the scientific research community since 1993. As technologies and research evolve, so does SGD: from updates in software architecture, to curation of novel data types, to incorporation of data from, and collaboration with, other knowledgebases. We are continuing to make steps toward providing the community with an S. cerevisiae pan-genome. Here, we describe software upgrades, a new nomenclature system for genes not found in the reference strain, and additions to gene pages. With these improvements, we aim to remain a leading resource for students, researchers, and the broader scientific community.}, } @article {pmid36605514, year = {2022}, author = {Dong, C and Wei, L and Wang, J and Lai, Q and Huang, Z and Shao, Z}, title = {Genome-based taxonomic rearrangement of Oceanobacter-related bacteria including the description of Thalassolituus hydrocarbonoclasticus sp. nov. and Thalassolituus pacificus sp. nov. and emended description of the genus Thalassolituus.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1051202}, pmid = {36605514}, issn = {1664-302X}, abstract = {Oceanobacter-related bacteria (ORB) are a group of oligotrophic marine bacteria play an underappreciated role in carbon cycling. They have been frequently described as one of the dominant bacterial groups with a wide distribution in coastal and deep seawater of global oceans. To clarify their taxonomic affiliation in relation to alkane utilization, phylogenomic and comparative genomics analyses were performed based on currently available genomes from GenBank and four newly isolated strains, in addition to phenotypic and chemotaxonomic characteristics. Consistently, phylogenomic analysis robustly separated them into two groups, which are accordingly hydrocarbon-degrading (HD, Thalassolituus and Oleibacter) and non-HD (NHD, Oceanobacter). In addition, the two groups can also be readily distinguished by several polyphasic taxonomic characteristics. Furthermore, both AAI and POCP genomic indices within the HD group support the conclusion that the members of the genus Oleibacter should be transferred into the genus Thalassolituus. Moreover, HD and NHD bacteria differed significantly in terms of genome size, G + C content and genes involved in alkane utilization. All HD bacteria contain the key gene alkB encoding an alkane monooxygenase, which can be used as a marker gene to distinguish the members of closely related genera Oceanobacter and Thalassolituus. Pangenome analysis revealed that the larger accessory genome may endow Thalassolituus with the flexibility to cope with the dynamics of marine environments and thrive therein, although they possess smaller pan, core- and unique-genomes than Oceanobacter. Within the HD group, twelve species were clearly distinguished from each other by both dDDH and ANI genomic indices, including two novel species represented by the newly isolated strains alknpb1M-1 [T] and 59MF3M-4 [T] , for which the names Thalassolituus hydrocarbonoclasticus sp. nov. and Thalassolituus pacificus sp. nov. are proposed. Collectively, these findings build a phylogenetic framework for the ORB and contribute to understanding of their role in marine carbon cycling.}, } @article {pmid36605106, year = {2022}, author = {Ali, A and Khatoon, A and Mirza, T and Ahmad, F}, title = {Intensification in Genetic Information and Acquisition of Resistant Genes in Genome of Acinetobacter baumannii: A Pan-Genomic Analysis.}, journal = {BioMed research international}, volume = {2022}, number = {}, pages = {3186343}, pmid = {36605106}, issn = {2314-6141}, mesh = {Humans ; *Acinetobacter baumannii/genetics ; Genomics ; Genome, Bacterial/genetics ; Anti-Bacterial Agents/pharmacology ; Computational Biology ; Drug Resistance, Multiple, Bacterial/genetics ; Microbial Sensitivity Tests ; }, abstract = {Acinetobacter baumannii (A. baumannii) attributes 26% of the mortality rate in hospitalized patients, and the percentage can rise to 46 in patients admitted to ICU as it is a major cause of ventilator-associated pneumonia. It has been nominated as the critical priority organism by WHO for which new therapeutic drugs are urgently required. To understand the genomic identification of different strains, antimicrobial resistance patterns, and epidemiological typing of organisms, whole-genome sequencing (WGS) analysis provides insight to explore new epitopes to develop new drugs against the organism. Therefore, the study is aimed at investigating the whole genome sequence of A. baumannii strains to report the new intensifications in its genomic profile. The genome sequences were retrieved from the NCBI database system. Pan-genome BPGA (Bacterial Pan-genome Analysis Tool) was used to analyze the core, pan, and species-specific genome analysis. The pan and core genome curves were extrapolated using the empirical power law equation f(x) = a.xb and the exponential equation f1(x) = c.e (d.x). To identify the resistant genes with resistant mutations against antibiotics, ResFinder and Galaxy Community hub bioinformatics tools were used. According to pan-genome analysis, there were 2227 core genes present in each species of the A. baumannii genome. Furthermore, the number of accessory genes ranged from 1182 to 1460, and the unique genes in the genome were 931. There were 325 exclusively absent genes in the genome of Acinetobacter baumannii. The pan-genome analysis showed that there is a 5-fold increase in the genome of A. baumannii in 5 years, and the genome is still open. There is the addition of multiple unique genes; among them, genes participating in the function of information and processing are increased.}, } @article {pmid36598708, year = {2023}, author = {Karthik, K and Anbazhagan, S and Chitra, MA and Sridhar, R}, title = {Comparative phylogenomics of Trueperella pyogenes reveals host-based distinction of strains.}, journal = {Antonie van Leeuwenhoek}, volume = {116}, number = {4}, pages = {343-351}, pmid = {36598708}, issn = {1572-9699}, mesh = {Cattle ; Animals ; Swine ; Phylogeny ; Multilocus Sequence Typing ; *Genomics ; }, abstract = {Trueperella pyogenes, an opportunistic pathogen causes various ailments in different animals. Different strains from different animals have distinct characters phenotypically and genotypically. Hence understanding the strains in a particular geographical location helps in framing the preventive measures. Comparative genomics of all the available T. pyogenes genome in the NCBI was conducted to understand the relatedness among strains. Whole genome phylogeny showed host associated clustering of strains recovered from swine lungs. Core genome phylogeny also showed host associated clustering mimicking whole genome phylogeny results. MLST analysis showed that there was higher diversity among cattle strains. Multidimensional scaling revealed five swine clusters, two cattle and buffalo clusters. Pangenome analysis also showed that T. pyogenes had an open genome with 57.09% accessory genome. Host specific genes were identified by pangenome analysis, and (R)-citramalate synthase was specific for swine strains of Asian origin. Host specifc genes identified by pangenome analysis can be exploited for developing a molecular assay to specifically identify the strains. The study shows that MLST having higher discriminatory power can be used as an epidemiological tool for strain discrimination of T. pyogenes.}, } @article {pmid36598279, year = {2023}, author = {Xu, C and Rao, J and Xie, Y and Lu, J and Li, Z and Dong, C and Wang, L and Jiang, J and Chen, C and Chen, S}, title = {The DNA Phosphorothioation Restriction-Modification System Influences the Antimicrobial Resistance of Pathogenic Bacteria.}, journal = {Microbiology spectrum}, volume = {11}, number = {1}, pages = {e0350922}, pmid = {36598279}, issn = {2165-0497}, mesh = {*Anti-Bacterial Agents/pharmacology ; *Drug Resistance, Bacterial/genetics ; Bacteria/genetics ; DNA Restriction-Modification Enzymes/genetics ; DNA ; Gene Transfer, Horizontal ; }, abstract = {Bacterial defense barriers, such as DNA methylation-associated restriction-modification (R-M) and the CRISPR-Cas system, play an important role in bacterial antimicrobial resistance (AMR). Recently, a novel R-M system based on DNA phosphorothioate (PT) modification has been shown to be widespread in the kingdom of Bacteria as well as Archaea. However, the potential role of the PT R-M system in bacterial AMR remains unclear. In this study, we explored the role of PT R-Ms in AMR with a series of common clinical pathogenic bacteria. By analyzing the distribution of AMR genes related to mobile genetic elements (MGEs), it was shown that the presence of PT R-M effectively reduced the distribution of horizontal gene transfer (HGT)-derived AMR genes in the genome, even in the bacteria that did not tend to acquire AMR genes by HGT. In addition, unique gene variation analysis based on pangenome analysis and MGE prediction revealed that the presence of PT R-M could suppress HGT frequency. Thus, this is the first report showing that the PT R-M system has the potential to repress HGT-derived AMR gene acquisition by reducing the HGT frequency. IMPORTANCE In this study, we demonstrated the effect of DNA PT modification-based R-M systems on horizontal gene transfer of AMR genes in pathogenic bacteria. We show that there is no apparent association between the genetic background of the strains harboring PT R-Ms and the number of AMR genes or the kinds of gene families. The strains equipped with PT R-M harbor fewer plasmid-derived, prophage-derived, or integrating mobile genetic element (iMGE)-related AMR genes and have a lower HGT frequency, but the degree of inhibition varies among different bacteria. In addition, compared with Salmonella enterica and Escherichia coli, Klebsiella pneumoniae prefers to acquire MGE-derived AMR genes, and there is no coevolution between PT R-M clusters and bacterial core genes.}, } @article {pmid36589110, year = {2022}, author = {Liang, L and Zhang, J and Xiao, J and Li, X and Xie, Y and Tan, H and Song, X and Zhu, L and Xue, X and Xu, L and Zhou, P and Ran, J and Sun, B and Huang, Z and Tang, Y and Lin, L and Sun, G and Lai, Y and Li, H}, title = {Genome and pan-genome assembly of asparagus bean (Vigna unguiculata ssp. sesquipedialis) reveal the genetic basis of cold adaptation.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1059804}, pmid = {36589110}, issn = {1664-462X}, abstract = {Asparagus bean (Vigna unguiculata ssp. sesquipedialis) is an important cowpea subspecies. We assembled the genomes of Ningjiang 3 (NJ, 550.31 Mb) and Dubai bean (DB, 564.12 Mb) for comparative genomics analysis. The whole-genome duplication events of DB and NJ occurred at 64.55 and 64.81 Mya, respectively, while the divergence between soybean and Vigna occurred in the Paleogene period. NJ genes underwent positive selection and amplification in response to temperature and abiotic stress. In species-specific gene families, NJ is mainly enriched in response to abiotic stress, while DB is primarily enriched in respiration and photosynthesis. We established the pan-genomes of four accessions (NJ, DB, IT97K-499-35 and Xiabao II) and identified 20,336 (70.5%) core genes present in all the accessions, 6,507 (55.56%) variable genes in two individuals, and 2,004 (6.95%) unique genes. The final pan genome is 616.35 Mb, and the core genome is 399.78 Mb. The variable genes are manifested mainly in stress response functions, ABC transporters, seed storage, and dormancy control. In the pan-genome sequence variation analysis, genes affected by presence/absence variants were enriched in biological processes associated with defense responses, immune system processes, signal transduction, and agronomic traits. The results of the present study provide genetic data that could facilitate efficient asparagus bean genetic improvement, especially in producing cold-adapted asparagus bean.}, } @article {pmid36586056, year = {2023}, author = {Tanwar, UK and Stolarska, E and Rudy, E and Paluch-Lubawa, E and Grabsztunowicz, M and Arasimowicz-Jelonek, M and Sobieszczuk-Nowicka, E}, title = {Metal tolerance gene family in barley: an in silico comprehensive analysis.}, journal = {Journal of applied genetics}, volume = {64}, number = {2}, pages = {197-215}, pmid = {36586056}, issn = {2190-3883}, mesh = {*Hordeum/genetics ; Phylogeny ; Amino Acid Sequence ; Plant Proteins/genetics ; Stress, Physiological/genetics ; }, abstract = {Metal-tolerance proteins (MTPs) are divalent cation transporters that play critical roles in metal tolerance and ion homeostasis in plants. However, a comprehensive study of MTPs is still lacking in crop plants. The current study aimed to comprehensively identify and characterize the MTP gene family in barley (Hordeum vulgare, Hv), an important crop. In total, 12 HvMTPs were identified in the barley genome in this study. They were divided into three phylogenetic groups (Zn-cation diffusion facilitator proteins [CDFs], Fe/Zn-CDFs, and Mn-CDFs) and further subdivided into seven groups (G1, G5, G6, G7, G8, G9, and G12). The majority of MTPs were hydrophobic proteins found in the vacuolar membrane. Gene duplication analysis of HvMTPs revealed one pair of segmental-like duplications in the barley genome. Evolutionary analysis suggested that barley MTPs underwent purifying natural selection. Additionally, the HvMTPs were analyzed in the pan-genome sequences of barley (20 accessions), which suggests that HvMTPs are highly conserved in barley evolution. Cis-acting regulatory elements, microRNA target sites, and protein-protein interaction analysis indicated the role of HvMTPs in a variety of biological processes. Expression profiling suggests that HvMTPs play an active role in maintaining barley nutrient homeostasis throughout its life cycle, and their expression levels were not significantly altered by abiotic stresses like cold, drought, or heat. The expression of barley HvMTP genes in the presence of heavy metals such as Zn[2+], Cu[2+], As[3+], and Cd[2+] revealed that these MTPs were induced by at least one metal ion, implying their involvement in metal tolerance or transportation. The identification and comprehensive investigation of MTP gene family members will provide important gene resources for the genetic improvement of crops for metal tolerance, bioremediation, or biofortification of staple crops.}, } @article {pmid36585993, year = {2023}, author = {Bordel, S and Martín-González, D and Muñoz, R and Santos-Beneit, F}, title = {Genome sequence analysis and characterization of Bacillus altitudinis B12, a polylactic acid- and keratin-degrading bacterium.}, journal = {Molecular genetics and genomics : MGG}, volume = {298}, number = {2}, pages = {389-398}, pmid = {36585993}, issn = {1617-4623}, support = {067/229111//FEDER (TCUE 2021-2023)/ ; }, mesh = {Animals ; *Keratins/genetics/metabolism ; *Bacteria ; Polyesters/metabolism ; Sequence Analysis ; }, abstract = {Keratin-rich wastes, mainly in the form of feathers, are recalcitrant residues generated in high amounts as by-products in chicken farms and food industry. Polylactic acid (PLA) is the second most common biodegradable polymer found in commercial plastics, which is not easily degraded by microbial activity. This work reports the 3.8-Mb genome of Bacillus altitudinis B12, a highly efficient PLA- and keratin-degrading bacterium, with potential for environmental friendly biotechnological applications in the feed, fertilizer, detergent, leather, and pharmaceutical industries. The whole genome sequence of B. altitudinis B12 revealed that this strain (which had been previously misclassified as Bacillus pumilus B12) is closely related to the B. altitudinis strains ER5, W3, and GR-8. A total of 4056 coding sequences were annotated using the RAST server, of which 2484 are core genes of the pan genome of B. altitudinis and 171 are unique to this strain. According to the sequence analysis, B. pumilus B12 has a predicted secretome of 353 proteins, among which a keratinase and a PLA depolymerase were identified by sequence analysis. The presence of these two enzymes could explain the characterized PLA and keratin biodegradation capability of the strain.}, } @article {pmid36579850, year = {2023}, author = {Javkar, K and Rand, H and Strain, E and Pop, M}, title = {PRAWNS: compact pan-genomic features for whole-genome population genomics.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {1}, pages = {}, pmid = {36579850}, issn = {1367-4811}, support = {R01 AI100947/AI/NIAID NIH HHS/United States ; //Center for Food Safety and Applied Nutrition/ ; }, mesh = {*Metagenomics ; *Software ; Genomics ; Genome ; Bacteria ; }, abstract = {MOTIVATION: Scientists seeking to understand the genomic basis of bacterial phenotypes, such as antibiotic resistance, today have access to an unprecedented number of complete and nearly complete genomes. Making sense of these data requires computational tools able to perform multiple-genome comparisons efficiently, yet currently available tools cannot scale beyond several tens of genomes.

RESULTS: We describe PRAWNS, an efficient and scalable tool for multiple-genome analysis. PRAWNS defines a concise set of genomic features (metablocks), as well as pairwise relationships between them, which can be used as a basis for large-scale genotype-phenotype association studies. We demonstrate the effectiveness of PRAWNS by identifying genomic regions associated with antibiotic resistance in Acinetobacter baumannii.

PRAWNS is implemented in C++ and Python3, licensed under the GPLv3 license, and freely downloadable from GitHub (https://github.com/KiranJavkar/PRAWNS.git).

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid36577205, year = {2023}, author = {Kadiri, M and Sevugapperumal, N and Nallusamy, S and Ragunathan, J and Ganesan, MV and Alfarraj, S and Ansari, MJ and Sayyed, RZ and Lim, HR and Show, PL}, title = {Pan-genome analysis and molecular docking unveil the biocontrol potential of Bacillus velezensis VB7 against Phytophthora infestans.}, journal = {Microbiological research}, volume = {268}, number = {}, pages = {127277}, doi = {10.1016/j.micres.2022.127277}, pmid = {36577205}, issn = {1618-0623}, mesh = {*Phytophthora infestans ; Molecular Docking Simulation ; *Solanum tuberosum ; Base Sequence ; Plant Diseases/prevention & control ; }, abstract = {Management of late blight of potato incited by Phytophthora infestans remains a major challenge. Coevolution of pathogen with resistant strains and the rise of fungicide resistance have made it more challenging to prevent the spread of P. infestans. Here, the anti-oomycete potential of Bacillus velezensis VB7 against P. infestans through pan-genome analysis and molecular docking were explored. The Biocontrol potential of VB7 against P. infestans was assessed using a confrontational assay. The biomolecules from the inhibition zone were identified and subjected to in silico analysis against P. infestans target proteins. Nucleotide sequences for 54 B. velezensis strains from different geographical locations were used for pan-genome analysis. The confrontational assay revealed the anti-oomycetes potential of VB7 against P. infestans. Molecular docking confirmed that the penicillamine disulfide had the maximum binding energy with eight effector proteins of P. infestans. Besides, scanning electron microscopic observations of P. infestans interaction with VB7 revealed structural changes in hypha and sporangia. Pan-genome analysis between 54 strains of B. velezensis confirmed that the core genome had 2226 genes, and it has an open pan-genome. The present study confirmed the anti-oomycete potential of B. velezensis VB7 against P. infestans and paved the way to explore the genetic potential of VB7.}, } @article {pmid36575347, year = {2023}, author = {Srivastava, S and Bombaywala, S and Jakhesara, SJ and Patil, NV and Joshi, CG and Purohit, HJ and Dafale, NA}, title = {Potential of camel rumen derived Bacillus subtilis and Bacillus velezensis strains for application in plant biomass hydrolysis.}, journal = {Molecular genetics and genomics : MGG}, volume = {298}, number = {2}, pages = {361-374}, pmid = {36575347}, issn = {1617-4623}, mesh = {Animals ; Bacillus subtilis/genetics ; Camelus ; Hydrolysis ; Rumen ; Biomass ; *Cellulase/metabolism ; *Bacillus/genetics ; }, abstract = {Rumen inhabiting Bacillus species possesses a high genetic potential for plant biomass hydrolysis and conversion to value-added products. In view of the same, five camel rumen-derived Bacillus strains, namely B. subtilis CRN 1, B. velezensis CRN 2, B. subtilis CRN 7, B. subtilis CRN 11, and B. velezensis CRN 23 were initially assayed for diverse hydrolytic activities, followed by genome mining to unravel the potential applications. CRN 1 and CRN 7 showed the highest endoglucanase activity with 0.4 U/ml, while CRN 23 showed high β-xylosidase activity of 0.36 U/ml. The comprehensive genomic insights of strains resolve taxonomic identity, clusters of an orthologous gene, pan-genome dynamics, and metabolic features. Annotation of Carbohydrate active enzymes (CAZymes) reveals the presence of diverse glycoside hydrolases (GH) GH1, GH5, GH43, and GH30, which are solely responsible for the effective breakdown of complex bonds in plant polysaccharides. Further, protein modeling and ligand docking of annotated endoglucanases showed an affinity for cellotrioside, cellobioside, and β-glucoside. The finding indicates the flexibility of Bacillus-derived endoglucanase activity on diverse cellulosic substrates. The presence of the butyrate synthesis gene in the CRN 1 strain depicts its key role in the production of important short-chain fatty acids essential for healthy rumen development. Similarly, antimicrobial peptides such as bacilysin and non-ribosomal peptides (NRPS) synthesized by the Bacillus strains were also annotated in the genome. The findings clearly define the role of Bacillus sp. inside the camel rumen and its potential application in various plant biomass utilizing industry and animal health research sectors.}, } @article {pmid36567375, year = {2023}, author = {Filipić, B and Malešević, M and Vasiljević, Z and Novović, K and Kojić, M and Jovčić, B}, title = {Comparative genomics of trimethoprim-sulfamethoxazole-resistant Achromobacter xylosoxidans clinical isolates from Serbia reveals shortened variant of class 1 integron integrase gene.}, journal = {Folia microbiologica}, volume = {68}, number = {3}, pages = {431-440}, pmid = {36567375}, issn = {1874-9356}, support = {451-03-68/2022-14/200161//Ministarstvo Prosvete, Nauke i Tehnološkog Razvoja/ ; 451-03-68/2022-14/200042//Ministarstvo Prosvete, Nauke i Tehnološkog Razvoja/ ; 451-03-68/2022-14/200178//Ministarstvo Prosvete, Nauke i Tehnološkog Razvoja/ ; }, mesh = {Humans ; Child ; Trimethoprim, Sulfamethoxazole Drug Combination ; *Achromobacter denitrificans/genetics ; Anti-Bacterial Agents/therapeutic use ; Integrases/therapeutic use ; Integrons/genetics ; Serbia ; *Achromobacter ; *Cystic Fibrosis ; Genomics ; *Gram-Negative Bacterial Infections ; Microbial Sensitivity Tests ; }, abstract = {Trimethoprim-sulfamethoxazole (SXT) is the preferable treatment option of the infections caused by Achromobacter spp. Our study aimed to analyze the SXT resistance of 98 Achromobacter spp. isolates from pediatric patients, among which 33 isolates were SXT-resistant. The presence of intI1 was screened by PCR and genome sequence analyses. The intI1 gene was detected in 10 of SXT-resistant isolates that had shorter intI1 PCR fragments named intI1S. Structural changes in intI1S were confirmed by genome sequencing and analyses which revealed 86 amino acids deletion in IntI1S protein compared to canonical IntI1 protein. All IntI1S isolates were of non-CF origin. Pan-genome analysis of intI1S bearing A. xylosoxidans isolates comprised 9052 genes, with the core genome consisting of 5455 protein-coding genes. Results in this study indicate that IntI1S isolates were derived from clinical settings and that cystic fibrosis (CF) patients were potential reservoirs for healthcare-associated infections that occurred in non-CF patients.}, } @article {pmid36566389, year = {2023}, author = {Shirasawa, K and Hosokawa, M and Yasui, Y and Toyoda, A and Isobe, S}, title = {Chromosome-scale genome assembly of a Japanese chili pepper landrace, Capsicum annuum 'Takanotsume'.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {30}, number = {1}, pages = {}, pmid = {36566389}, issn = {1756-1663}, support = {16H02535//KAKENHI/ ; //Kazusa DNA Research Institute Foundation/ ; }, mesh = {*Capsicum/genetics ; Chromosome Mapping ; Chromosomes ; Plant Breeding ; }, abstract = {Here, we report the genome sequence of a popular Japanese chili pepper landrace, Capsicum annuum 'Takanotsume'. We used long-read sequencing and optical mapping, together with the genetic mapping technique, to obtain the chromosome-scale genome assembly of 'Takanotsume'. The assembly consists of 12 pseudomolecules, which corresponds to the basic chromosome number of C. annuum, and is 3,058.5 Mb in size, spanning 97.0% of the estimated genome size. A total of 34,324 high-confidence genes were predicted in the genome, and 83.4% of the genome assembly was occupied by repetitive sequences. Comparative genomics of linked-read sequencing-derived de novo genome assemblies of two Capsicum chinense lines and whole-genome resequencing analysis of Capsicum species revealed not only nucleotide sequence variations but also genome structure variations (i.e. chromosomal rearrangements and transposon-insertion polymorphisms) between 'Takanotsume' and its relatives. Overall, the genome sequence data generated in this study will accelerate the pan-genomics and breeding of Capsicum, and facilitate the dissection of genetic mechanisms underlying the agronomically important traits of 'Takanotsume'.}, } @article {pmid36558824, year = {2022}, author = {Xia, F and Cheng, J and Jiang, M and Wang, Z and Wen, Z and Wang, M and Ren, J and Zhuge, X}, title = {Genomics Analysis to Identify Multiple Genetic Determinants That Drive the Global Transmission of the Pandemic ST95 Lineage of Extraintestinal Pathogenic Escherichia coli (ExPEC).}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {12}, pages = {}, pmid = {36558824}, issn = {2076-0817}, support = {BE2022329//Jiangsu Province Key Research and Development Program (Modern Agriculture) Project/ ; 32172855//National Natural Science Foundation of China/ ; }, abstract = {Extraintestinal pathogenic Escherichia coli (ExPEC) is a pathogen that causes host extraintestinal diseases. The ST95 E. coli lineage is one of the dominant ExPEC lineages in humans and poultry. In this study, we took advantage of extensive E. coli genomes available through public open-access databases to construct a detailed understanding of the phylogeny and evolution of ST95. We used a high variability of accessory genomes to highlight the diversity and dynamic traits of ST95. Isolates from diverse hosts and geographic sources were randomly located on the phylogenetic tree, which suggested that there is no host specificity for ST95. The time-scaled phylogeny showed that ST95 is an ancient and long-lasting lineage. The virulence genes, resistance genes, and pathogenicity islands (PAIs) were characterized in ST95 pan-genomes to provide novel insights into the pathogenicity and multidrug resistance (MDR) genotypes. We found that a pool of large plasmids drives virulence and MDR. Based on the unique genes in the ST95 pan-genome, we designed a novel multiplex PCR reaction to rapidly detect ST95. Overall, our study addressed a gap in the current understanding of ST95 ExPEC genomes, with significant implications for recognizing the success and spread of ST95.}, } @article {pmid36558765, year = {2022}, author = {Lu, Q and Zhu, X and Long, Q and Yi, X and Yang, A and Long, X and Cao, D}, title = {Comparative Genomics Reveal the Utilization Ability of Variable Carbohydrates as Key Genetic Features of Listeria Pathogens in Their Pathogenic Lifestyles.}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {12}, pages = {}, pmid = {36558765}, issn = {2076-0817}, support = {baike202236, baike202235//Baise Science and Technology Plan Project/ ; Nos. 81860489//National Natural Science Foundation of China/ ; Nos. AD19245174//the Science-Technology Program of Guangxi/ ; }, abstract = {BACKGROUND: L. monocytogenes and L. ivanovii, the only two pathogens of Listeria, can survive in various environments, having different pathogenic characteristics. However, the genetic basis of their excellent adaptability and differences in pathogenicity has still not been completely elucidated.

METHODS: We performed a comparative genomic analysis based on 275 L. monocytogenes, 10 L. ivanovii, and 22 non-pathogenic Listeria strains.

RESULTS: Core/pan-genome analysis revealed that 975 gene families were conserved in all the studied strains. Additionally, 204, 242, and 756 gene families existed uniquely in L. monocytogenes, L. ivanovii, and both, respectively. Functional annotation partially verified that these unique gene families were closely related to their adaptability and pathogenicity. Moreover, the protein-protein interaction (PPI) network analysis of these unique gene sets showed that plenty of carbohydrate transport systems and energy metabolism enzymes were clustered in the networks. Interestingly, ethanolamine-metabolic-process-related proteins were significantly enriched in the PPI network of the unique genes of the Listeria pathogens, which can be understood as a determining factor of their pathogenicity.

CONCLUSIONS: The utilization capacity of multiple carbon sources of Listeria pathogens, especially ethanolamine, is the key genetic basis for their ability to adapt to various environments and pathogenic lifestyles.}, } @article {pmid36557654, year = {2022}, author = {Vázquez-Sánchez, DA and Grillo, S and Carrera-Salinas, A and González-Díaz, A and Cuervo, G and Grau, I and Camoez, M and Martí, S and Berbel, D and Tubau, F and Ardanuy, C and Pujol, M and Càmara, J and Domínguez, MÁ}, title = {Molecular Epidemiology, Antimicrobial Susceptibility, and Clinical Features of Methicillin-Resistant Staphylococcus aureus Bloodstream Infections over 30 Years in Barcelona, Spain (1990-2019).}, journal = {Microorganisms}, volume = {10}, number = {12}, pages = {}, pmid = {36557654}, issn = {2076-2607}, support = {PI16/01382//Instituto de Salud Carlos III/ ; CIBERES-CB06/06/0037//Centro de Investigación Biomédica en Red de Enfermedades Respiratorias/ ; CIBERINFEC-CB21/13/00009//Centro de Investigación Biomédica en Red de Enfermedades Infecciosas/ ; FPU16/02202//Ministerio de Educación Cultura y Deporte/ ; CP19/00096//Instituto de Salud Carlos III/ ; }, abstract = {Methicillin-resistant Staphylococcus aureus bloodstream infections (MRSA-BSI) are a significant cause of mortality. We analysed the evolution of the molecular and clinical epidemiology of MRSA-BSI (n = 784) in adult patients (Barcelona, 1990−2019). Isolates were tested for antimicrobial susceptibility and genotyped (PFGE), and a selection was sequenced (WGS) to characterise the pangenome and mechanisms underlying antimicrobial resistance. Increases in patient age (60 to 71 years), comorbidities (Charlson’s index > 2, 10% to 94%), community-onset healthcare-associated acquisition (9% to 60%), and 30-day mortality (28% to 36%) were observed during the 1990−1995 and 2014−2019 periods. The proportion of catheter-related BSIs fell from 57% to 20%. Current MRSA-BSIs are caused by CC5-IV and an upward trend of CC8-IV and CC22-IV clones. CC5 and CC8 had the lowest core genome proportions. Antimicrobial resistance rates fell, and only ciprofloxacin, tobramycin, and erythromycin remained high (>50%) due to GyrA/GrlA changes, the presence of aminoglycoside-modifying enzymes (AAC(6′)-Ie-APH(2″)-Ia and ANT(4′)-Ia), and mph(C)/msr(A) or erm (C) genes. Two CC22-IV strains showed daptomycin resistance (MprF substitutions). MRSA-BSI has become healthcare-associated, affecting elderly patients with comorbidities and causing high mortality rates. Clonal replacement with CC5-IV and CC8-IV clones resulted in lower antimicrobial resistance rates. The increased frequency of the successful CC22-IV, associated with daptomycin resistance, should be monitored.}, } @article {pmid36553557, year = {2022}, author = {Wang, L and Zhou, F and Zhou, J and Harvey, PR and Yu, H and Zhang, G and Zhang, X}, title = {Genomic Analysis of Pseudomonas asiatica JP233: An Efficient Phosphate-Solubilizing Bacterium.}, journal = {Genes}, volume = {13}, number = {12}, pages = {}, pmid = {36553557}, issn = {2073-4425}, mesh = {*Phosphates/metabolism ; Phylogeny ; *Pseudomonas ; Genomics ; }, abstract = {The bacterium Pseudomonas sp. strain JP233 has been reported to efficiently solubilize sparingly soluble inorganic phosphate, promote plant growth and significantly reduce phosphorus (P) leaching loss from soil. The production of 2-keto gluconic acid (2KGA) by strain JP233 was identified as the main active metabolite responsible for phosphate solubilization. However, the genetic basis of phosphate solubilization and plant-growth promotion remained unclear. As a result, the genome of JP233 was sequenced and analyzed in this study. The JP233 genome consists of a circular chromosome with a size of 5,617,746 bp and a GC content of 62.86%. No plasmids were detected in the genome. There were 5097 protein-coding sequences (CDSs) predicted in the genome. Phylogenetic analyses based on genomes of related Pseudomonas spp. identified strain JP233 as Pseudomonas asiatica. Comparative pangenomic analysis among 9 P. asiatica strains identified 4080 core gene clusters and 111 singleton genes present only in JP233. Genes associated with 2KGA production detected in strain JP233, included those encoding glucose dehydrogenase, pyrroloquinoline quinone and gluoconate dehydrogenase. Genes associated with mechanisms of plant-growth promotion and nutrient acquisition detected in JP233 included those involved in IAA biosynthesis, ethylene catabolism and siderophore production. Numerous genes associated with other properties beneficial to plant growth were also detected in JP233, included those involved in production of acetoin, 2,3-butanediol, trehalose, and resistance to heavy metals. This study provides the genetic basis to elucidate the plant-growth promoting and bio-remediation properties of strain JP233 and its potential applications in agriculture and industry.}, } @article {pmid36551744, year = {2022}, author = {Alturki, NA and Mashraqi, MM and Jalal, K and Khan, K and Basharat, Z and Alzamami, A}, title = {Therapeutic Target Identification and Inhibitor Screening against Riboflavin Synthase of Colorectal Cancer Associated Fusobacterium nucleatum.}, journal = {Cancers}, volume = {14}, number = {24}, pages = {}, pmid = {36551744}, issn = {2072-6694}, support = {NA//Shaqra University/ ; }, abstract = {Colorectal cancer (CRC) ranks third among all cancers in terms of prevalence. There is growing evidence that gut microbiota has a role in the development of colorectal cancer. Fusobacterium nucleatum is overrepresented in the gastrointestinal tract and tumor microenvironment of patients with CRC. This suggests the role of F. nucleatum as a potential risk factor in the development of CRC. Hence, we aimed to explore whole genomes of F. nucleatum strains related to CRC to predict potential therapeutic markers through a pan-genome integrated subtractive genomics approach. In the current study, we identified 538 proteins as essential for F. nucleatum survival, 209 non-homologous to a human host, and 12 as drug targets. Eventually, riboflavin synthase (RiS) was selected as a therapeutic target for further processing. Three different inhibitor libraries of lead-like natural products, i.e., cyanobactins (n = 237), streptomycins (n = 607), and marine bacterial secondary metabolites (n = 1226) were screened against it. After the structure-based study, three compounds, i.e., CMNPD3609 (−7.63) > Malyngamide V (−7.03) > ZINC06804365 (−7.01) were prioritized as potential inhibitors of F. nucleatum. Additionally, the stability and flexibility of these compounds bound to RiS were determined via a molecular dynamics simulation of 50 ns. Results revealed the stability of these compounds within the binding pocket, after 5 ns. ADMET profiling showed compounds as drug-like, non-permeable to the blood brain barrier, non-toxic, and HIA permeable. Pan-genomics mediated drug target identification and the virtual screening of inhibitors is the preliminary step towards inhibition of this pathogenic oncobacterium and we suggest mouse model experiments to validate our findings.}, } @article {pmid36550124, year = {2022}, author = {Vaughn, JN and Branham, SE and Abernathy, B and Hulse-Kemp, AM and Rivers, AR and Levi, A and Wechter, WP}, title = {Graph-based pangenomics maximizes genotyping density and reveals structural impacts on fungal resistance in melon.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {7897}, pmid = {36550124}, issn = {2041-1723}, mesh = {Genotype ; *Cucurbitaceae/genetics ; DNA Copy Number Variations ; Plant Breeding ; Quantitative Trait Loci/genetics ; *Cucumis melo/genetics/microbiology ; }, abstract = {The genomic sequences segregating in experimental populations are often highly divergent from the community reference and from one another. Such divergence is problematic under various short-read-based genotyping strategies. In addition, large structural differences are often invisible despite being strong candidates for causal variation. These issues are exacerbated in specialty crop breeding programs with fewer, lower-quality sequence resources. Here, we examine the benefits of complete genomic information, based on long-read assemblies, in a biparental mapping experiment segregating at numerous disease resistance loci in the non-model crop, melon (Cucumis melo). We find that a graph-based approach, which uses both parental genomes, results in 19% more variants callable across the population and raw allele calls with a 2 to 3-fold error-rate reduction, even relative to single reference approaches using a parent genome. We show that structural variation has played a substantial role in shaping two Fusarium wilt resistance loci with known causal genes. We also report on the genetics of powdery mildew resistance, where copy number variation and local recombination suppression are directly interpretable via parental genome alignments. Benefits observed, even in this low-resolution biparental experiment, will inevitably be amplified in more complex populations.}, } @article {pmid36547858, year = {2023}, author = {Sreya, P and Suresh, G and Rai, A and Ria, B and Vighnesh, L and Agre, VC and Jagadeeshwari, U and Sasikala, C and Ramana, CV}, title = {Revisiting the taxonomy of the genus Rhodopirellula with the proposal for reclassification of the genus to Rhodopirellula sensu stricto, Aporhodopirellula gen. nov., Allorhodopirellula gen. nov. and Neorhodopirellula gen. nov.}, journal = {Antonie van Leeuwenhoek}, volume = {116}, number = {3}, pages = {243-264}, pmid = {36547858}, issn = {1572-9699}, mesh = {Sequence Analysis, DNA ; RNA, Ribosomal, 16S/genetics ; Phylogeny ; *Bacteria/genetics ; *DNA ; DNA, Bacterial/genetics ; Bacterial Typing Techniques ; Fatty Acids/chemistry ; }, abstract = {The current genus Rhodopirellula consists of marine bacteria which belong to the family Pirellulaceae of the phylum Planctomycetota. Members of the genus Rhodopirellula are aerobic, mesophiles and chemoheterotrophs. The here conducted analysis built on 16S rRNA gene sequence and multi-locus sequence analysis based phylogenomic trees suggested that the genus is subdivided into four clades. Existing Rhodopirellula species were studied extensively based on phenotypic, genomic and chemotaxonomic parameters. The heterogeneity was further confirmed by overall genome-related indices (OGRI) including digital DNA-DNA hybridization (dDDH), average nucleotide identity (ANI), average amino acid identity (AAI), and percentage of conserved proteins (POCP). AAI and POCP values between the clades of the genus Rhodopirellula were 62.2-69.6% and 49.5-62.5%, respectively. Comparative genomic approaches like pan-genome analysis and conserved signature indels (CSIs) also support the division of the clades. The genomic incoherence of the members of the genus is further supported by variations in phenotypic characteristics. Thus, with the here applied integrated comparative genomic and polyphasic approaches, we propose the reclassification of the genus Rhodopirellula to three new genera: Aporhodopirellula gen. nov., Allorhodopirellula gen. nov., and Neorhodopirellula gen. nov.}, } @article {pmid36547571, year = {2022}, author = {Bao, J and Wang, Z and Chen, M and Chen, S and Chen, X and Xie, J and Tang, W and Zheng, H and Wang, Z}, title = {Pan-Genomics Reveals a New Variation Pattern of Secreted Proteins in Pyricularia oryzae.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {12}, pages = {}, pmid = {36547571}, issn = {2309-608X}, support = {U1805232//National Natural Science Foundation of China/ ; 32270078//National Natural Science Foundation of China/ ; 32172365//National Natural Science Foundation of China/ ; 32001976//National Natural Science Foundation of China/ ; }, abstract = {(1) Background: Pyricularia oryzae, the causal agent of rice blast disease, is one of the major rice pathogens. The complex population structure of P. oryzae facilitates the rapid virulence variations, which make the blast disease a serious challenge for global food security. There is a large body of existing genomics research on P. oryzae, however the population structure at the pan-genome level is not clear, and the mechanism of genetic divergence and virulence variations of different sub-populations is also unknown. (2) Methods: Based on the genome data published in the NCBI, we constructed a pan-genome database of P. oryzae, which consisted of 156 strains (117 isolated from rice and 39 isolated from other hosts). (3) Results: The pan-genome contained a total of 24,100 genes (12,005 novel genes absent in the reference genome 70-15), including 16,911 (~70%) core genes (population frequency ≥95%) and 1378 (~5%) strain-specific genes (population frequency ≤5%). Gene presence-absence variation (PAV) based clustering analysis of the population structure of P. oryzae revealed four subgroups (three from rice and one from other hosts). Interestingly, the cloned avirulence genes and conventional secreted proteins (SPs, with signal peptides) were enriched in the high-frequency regions and significantly associated with transposable elements (TEs), while the unconventional SPs (without signal peptides) were enriched in the low-frequency regions and not associated significantly with TEs. This pan-genome will expand the breadth and depth of the rice blast fungus reference genome, and also serve as a new blueprint for scientists to further study the pathogenic mechanism and virulence variation of the rice blast fungus.}, } @article {pmid36544084, year = {2022}, author = {Morey-León, G and Andrade-Molina, D and Fernández-Cadena, JC and Berná, L}, title = {Comparative genomics of drug-resistant strains of Mycobacterium tuberculosis in Ecuador.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {844}, pmid = {36544084}, issn = {1471-2164}, support = {FCI-016-2017//University of Guayaquil/ ; SNI//Agencia Nacional de Investigación e Innovación/ ; }, mesh = {Male ; Humans ; Female ; *Mycobacterium tuberculosis ; Antitubercular Agents/pharmacology/therapeutic use ; *Tuberculosis, Multidrug-Resistant/epidemiology/microbiology ; Ecuador/epidemiology ; Phylogeny ; Mutation ; Microbial Sensitivity Tests ; *Tuberculosis/epidemiology/drug therapy ; Genomics ; Fluoroquinolones ; Drug Resistance, Multiple, Bacterial/genetics ; }, abstract = {BACKGROUND: Tuberculosis is a serious infectious disease affecting millions of people. In spite of efforts to reduce the disease, increasing antibiotic resistance has contributed to persist in the top 10 causes of death worldwide. In fact, the increased cases of multi (MDR) and extreme drug resistance (XDR) worldwide remains the main challenge for tuberculosis control. Whole genome sequencing is a powerful tool for predicting drug resistance-related variants, studying lineages, tracking transmission, and defining outbreaks. This study presents the identification and characterization of resistant clinical isolates of Mycobacterium tuberculosis including a phylogenetic and molecular resistance profile study by sequencing the complete genome of 24 strains from different provinces of Ecuador.

RESULTS: Genomic sequencing was used to identify the variants causing resistance. A total of 15/21 isolates were identified as MDR, 4/21 as pre-XDR and 2/21 as XDR, with three isolates discarded due to low quality; the main sub-lineage was LAM (61.9%) and Haarlem (19%) but clades X, T and S were identified. Of the six pre-XDR and XDR strains, it is noteworthy that five come from females; four come from the LAM sub-lineage and two correspond to the X-class sub-lineage. A core genome of 3,750 genes, distributed in 295 subsystems, was determined. Among these, 64 proteins related to virulence and implicated in the pathogenicity of M. tuberculosis and 66 possible pharmacological targets stand out. Most variants result in nonsynonymous amino acid changes and the most frequent genotypes were identified as conferring resistance to rifampicin, isoniazid, ethambutol, para-aminosalicylic acid and streptomycin. However, an increase in the resistance to fluoroquinolones was detected.

CONCLUSION: This work shows for the first time the variability of circulating resistant strains between men and women in Ecuador, highlighting the usefulness of genomic sequencing for the identification of emerging resistance. In this regard, we found an increase in fluoroquinolone resistance. Further sampling effort is needed to determine the total variability and associations with the metadata obtained to generate better health policies.}, } @article {pmid36539044, year = {2023}, author = {Lima, A and Carolina Barbosa Caetano, A and Hurtado Castillo, R and Gonçalves Dos Santos, R and Lucas Neres Rodrigues, D and de Jesus Sousa, T and Kato, RB and Vinicius Canário Viana, M and Cybelle Pinto Gomide, A and Figueira Aburjaile, F and Tiwari, S and Jaiswal, A and Gala-García, A and Seyffert, N and Luiz de Paula Castro, T and Brenig, B and Matiuzzi da Costa, M and Maria Seles Dorneles, E and Le Loir, Y and Azevedo, V}, title = {Comparative genomic analysis of ovine and other host associated isolates of Staphylococcus aureus exhibit the important role of mobile genetic elements and virulence factors in host adaptation.}, journal = {Gene}, volume = {855}, number = {}, pages = {147131}, doi = {10.1016/j.gene.2022.147131}, pmid = {36539044}, issn = {1879-0038}, mesh = {Female ; Animals ; Cattle ; Sheep/genetics ; Humans ; Swine ; Virulence Factors/genetics ; Staphylococcus aureus/genetics ; Host Adaptation ; *Staphylococcal Infections/genetics/veterinary/microbiology ; Ruminants/genetics ; Genomics ; Interspersed Repetitive Sequences ; *Mastitis, Bovine/genetics/microbiology ; }, abstract = {Staphylococcus aureus is the main etiological agent of mastitis in small ruminants worldwide. This disease has a difficult cure and possible relapse, leading to significant economic losses in production, milk quality and livestock. This study performed comparative genomic analyses between 73 S. aureus genomes from different hosts (human, bovine, pig and others). This work isolated and sequenced 12 of these genomes from ovine. This study contributes to the knowledge of genomic specialization and the role of specific genes in establishing infection in ovine mastitis-associated S. aureus. The genomes of S. aureus isolated from sheep maintained a higher representation when grouped with clonal complexes 130 and 133. The genomes showed high genetic similarity, the species pan-genome consisting of 4200 genes (central = 2008, accessory = 1559 and unique = 634). Among these, 277 unique genes were related to the genomes isolated from sheep, with 39.6 % as hypothetical proteins, 6.4 % as phages, 6.4 % as toxins, 2.9 % as transporters, and 44.7 % as related to other proteins. Furthermore, at the pathogen level, they showed 80 genes associated with virulence factors and 19 with antibiotic resistance shared in almost all isolates. Although S. aureus isolated from ovine showed susceptibility to antimicrobials in vitro, ten genes were predicted to be associated with antibiotic inactivation and efflux pump, suggesting resistance to gentamicin and penicillin. This work may contribute to identifying genes acquired by horizontal transfer and their role in host adaptation, virulence, bacterial resistance, and characterization of strains affecting ovine.}, } @article {pmid36537824, year = {2023}, author = {Simoni, S and Leoni, F and Veschetti, L and Malerba, G and Carelli, M and Lleò, MM and Brenciani, A and Morroni, G and Giovanetti, E and Rocchegiani, E and Barchiesi, F and Vignaroli, C}, title = {The Emerging Nosocomial Pathogen Klebsiella michiganensis: Genetic Analysis of a KPC-3 Producing Strain Isolated from Venus Clam.}, journal = {Microbiology spectrum}, volume = {11}, number = {1}, pages = {e0423522}, pmid = {36537824}, issn = {2165-0497}, mesh = {Humans ; Anti-Bacterial Agents/pharmacology ; Phylogeny ; *Cross Infection ; *Klebsiella Infections/epidemiology ; Drug Resistance, Multiple, Bacterial/genetics ; Plasmids/genetics ; Klebsiella pneumoniae ; beta-Lactamases/genetics ; Carbapenems/pharmacology ; Hospitals ; Bacterial Proteins/genetics ; Microbial Sensitivity Tests ; }, abstract = {The recovery and characterization of a multidrug-resistant, KPC-3-producing Klebsiella michiganensis that was obtained from Venus clam samples is reported in this study. A whole-genome sequencing (WGS) analysis using Illumina and Nanopore technologies of the K. michiganensis 23999A2 isolate revealed that the strain belonged to the new sequence type 382 (ST382) and carried seven plasmid replicon sequences, including four IncF type plasmids (FII, FIIY, FIIk, and FIB), one IncHI1 plasmid, and two Col plasmids. The FIB and FIIk plasmids showed high homology to each other and to multireplicon pKpQIL-like plasmids that are found in epidemic KPC-K. pneumoniae clones worldwide. The strain carried multiple β-lactamase genes on the IncF plasmids: blaOXA-9 and blaTEM-1A on FIB, blaKPC-3 inserted in a Tn4401a on FIIK, and blaSHV-12 on FIIY. The IncHI1-ST11 harbored no resistance gene. The curing of the strain caused the loss of all of the bla genes and a rearrangement of the IncF plasmids. Conjugal transfer of the blaOXA-9, blaTEM-1A and blaKPC-3 genes occurred at a frequency of 5 × 10[-7], using K. quasipneumoniae as a recipient, and all of the bla genes were transferred through a pKpQIL that originated from the recombination of the FIB and FIIk plasmids of the donor. A comparison with 31 K. michiganensis genomes that are available in the NCBI database showed that the closest phylogenetic relatives of K. michiganensis 23999A2 are an environmental isolate from soil in South Korea and a clinical isolate from human sputum in Japan. Finally, a pan-genome analysis showed a large accessory genome of the strain as well as the great genomic plasticity of the K. michiganensis species. IMPORTANCE Klebsiella michiganensis is an emerging nosocomial pathogen, and, so far, few studies describe isolates of clinical origin in the environment. This study contributes to the understanding of how the dissemination of carbapenem-resistance outside the hospital setting may be related to the circulation of pKpQIL-like plasmids that are derived from epidemic Klebsiella pneumoniae strains. The recovery of a carbapenem-resistant isolate in clams is of great concern, as bivalves could represent vehicles of transmission of pathogens and resistance genes to humans via the food chain. The study demonstrates the plasticity of K. michiganensis genome, which is probably useful to multiple environment adaptation and to the evolution of the species.}, } @article {pmid36536862, year = {2022}, author = {Cai, Q and Huang, Y and Zhou, L and Hu, N and Liu, Y and Guo, F and Liu, Q and Huang, X and Zhang, Y and Zeng, L}, title = {A Complete Genome of Nocardia terpenica NC_YFY_NT001 and Pan-Genomic Analysis Based on Different Sources of Nocardia spp. Isolates Reveal Possibly Host-Related Virulence Factors.}, journal = {Infection and drug resistance}, volume = {15}, number = {}, pages = {7259-7270}, pmid = {36536862}, issn = {1178-6973}, abstract = {OBJECTIVE: We aimed to identify the possible virulence genes associated with Nocardia NC_YFY_NT001 isolated by ourselves and other Nocardia spp.

METHODS: The genome of Nocardia terpenica NC_YFY_NT001 was completed by using PacBio and Illumina platforms. A pan-genomic analysis was applied to selected complete Nocardia genomes.

RESULTS: Nocardia terpenica NC_YFY_NT001 can cause healthy mice death by tail intravenous injection. The genome of NT001 has one circular chromosome 8,850,000 bp and one circular plasmid 70,000 bp with ~68% GC content. The chromosome and plasmid encode 7914 and 80 proteins, respectively. Furthermore, a pan-genomic analysis showed a total of 45,825 gene clusters, then 304 core, 21,045 shell and 24,476 cloud gene clusters were classified using specific parameters. In addition, we found that catalases were more abundant in human isolates. Furthermore, we also found no significant differences in the MCE proteins between different strains from different sources. The pan-genomic analysis also showed that 67 genes could only be found in humoral isolates. ReX3 and DUF853 domain protein were found in all eight human isolates. The composition of unique genes in humoral isolate genomes indicated that the transcriptional regulators may be important when Nocardia invades the host, which allows them to survive in the new ecological system.

CONCLUSION: In this study, we confirmed that NT001 could cause infected animal death, and identified many possible virulence factors for our future studies. This study also provides new insight for our further study on Nocardia virulence mechanisms.}, } @article {pmid36536253, year = {2023}, author = {Sohn, JI and Choi, MH and Yi, D and Menon, VA and Kim, YJ and Lee, J and Park, JW and Kyung, S and Shin, SH and Na, B and Joung, JG and Ju, YS and Yeom, MS and Koh, Y and Yoon, SS and Baek, D and Kim, TM and Nam, JW}, title = {Ultrafast prediction of somatic structural variations by filtering out reads matched to pan-genome k-mer sets.}, journal = {Nature biomedical engineering}, volume = {7}, number = {7}, pages = {853-866}, pmid = {36536253}, issn = {2157-846X}, mesh = {Humans ; *High-Throughput Nucleotide Sequencing/methods ; Genome ; Sequence Analysis, DNA/methods ; *Neoplasms ; }, abstract = {Variant callers typically produce massive numbers of false positives for structural variations, such as cancer-relevant copy-number alterations and fusion genes resulting from genome rearrangements. Here we describe an ultrafast and accurate detector of somatic structural variations that reduces read-mapping costs by filtering out reads matched to pan-genome k-mer sets. The detector, which we named ETCHING (for efficient detection of chromosomal rearrangements and fusion genes), reduces the number of false positives by leveraging machine-learning classifiers trained with six breakend-related features (clipped-read count, split-reads count, supporting paired-end read count, average mapping quality, depth difference and total length of clipped bases). When benchmarked against six callers on reference cell-free DNA, validated biomarkers of structural variants, matched tumour and normal whole genomes, and tumour-only targeted sequencing datasets, ETCHING was 11-fold faster than the second-fastest structural-variant caller at comparable performance and memory use. The speed and accuracy of ETCHING may aid large-scale genome projects and facilitate practical implementations in precision medicine.}, } @article {pmid36534203, year = {2022}, author = {Jesus, HNR and Ramos, JN and Rocha, DJPG and Alves, DA and Silva, CS and Cruz, JVO and Vieira, VV and Souza, C and Santos, LS and Navas, J and Ramos, RTJ and Azevedo, V and Aguiar, ERGR and Mattos-Guaraldi, AL and Pacheco, LGC}, title = {The pan-genome of the emerging multidrug-resistant pathogen Corynebacterium striatum.}, journal = {Functional & integrative genomics}, volume = {23}, number = {1}, pages = {5}, pmid = {36534203}, issn = {1438-7948}, support = {BOL0505/2018//Fundação de Amparo à Pesquisa do Estado da Bahia/ ; BOL0505/2018//Fundação de Amparo à Pesquisa do Estado da Bahia/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CAPES-PROCAD 071/2013//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; CNPq Nº 09/2018//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; CNPq Nº 09/2018//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; CNPq Nº 09/2018//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; MCT/FINEP/CT-INFRA01/2013//Financiadora de Estudos e Projetos/ ; }, mesh = {Humans ; *Corynebacterium ; *Anti-Bacterial Agents ; Phenotype ; Virulence Factors/genetics ; Drug Resistance, Multiple, Bacterial/genetics ; Microbial Sensitivity Tests ; }, abstract = {Corynebacterium striatum, a common constituent of the human skin microbiome, is now considered an emerging multidrug-resistant pathogen of immunocompromised and chronically ill patients. However, little is known about the molecular mechanisms in the transition from colonization to the multidrug-resistant (MDR) invasive phenotype in clinical isolates. This study performed a comprehensive pan-genomic analysis of C. striatum, including isolates from "normal skin microbiome" and from MDR infections, to gain insights into genetic factors contributing to pathogenicity and multidrug resistance in this species. For this, three novel genome sequences were obtained from clinical isolates of C. striatum of patients from Brazil, and other 24 complete or draft C. striatum genomes were retrieved from GenBank, including the ATCC6940 isolate from the Human Microbiome Project. Analysis of C. striatum strains demonstrated the presence of an open pan-genome (α = 0.852803) containing 3816 gene families, including 15 antimicrobial resistance (AMR) genes and 32 putative virulence factors. The core and accessory genomes included 1297 and 1307 genes, respectively. The identified AMR genes are primarily associated with resistance to aminoglycosides and tetracyclines. Of these, 66.6% are present in genomic islands, and four AMR genes, including aac(6')-ib7, are located in a class 1-integron. In conclusion, our data indicated that C. striatum possesses genomic characteristics favorable to the invasive phenotype, with high genomic plasticity, a robust genetic arsenal for iron acquisition, and important virulence determinants and AMR genes present in mobile genetic elements.}, } @article {pmid36534120, year = {2023}, author = {Gui, S and Martinez-Rivas, FJ and Wen, W and Meng, M and Yan, J and Usadel, B and Fernie, AR}, title = {Going broad and deep: sequencing-driven insights into plant physiology, evolution, and crop domestication.}, journal = {The Plant journal : for cell and molecular biology}, volume = {113}, number = {3}, pages = {446-459}, doi = {10.1111/tpj.16070}, pmid = {36534120}, issn = {1365-313X}, mesh = {*Domestication ; *Genome-Wide Association Study ; Genome, Plant/genetics ; Genomics ; Plants ; }, abstract = {Deep sequencing is a term that has become embedded in the plant genomic literature in recent years and with good reason. A torrent of (largely) high-quality genomic and transcriptomic data has been collected and most of this has been publicly released. Indeed, almost 1000 plant genomes have been reported (www.plabipd.de) and the 2000 Plant Transcriptomes Project has long been completed. The EarthBioGenome project will dwarf even these milestones. That said, massive progress in understanding plant physiology, evolution, and crop domestication has been made by sequencing broadly (across a species) as well as deeply (within a single individual). We will outline the current state of the art in genome and transcriptome sequencing before we briefly review the most visible of these broad approaches, namely genome-wide association and transcriptome-wide association studies, as well as the compilation of pangenomes. This will include both (i) the most commonly used methods reliant on single nucleotide polymorphisms and short InDels and (ii) more recent examples which consider structural variants. We will subsequently present case studies exemplifying how their application has brought insight into either plant physiology or evolution and crop domestication. Finally, we will provide conclusions and an outlook as to the perspective for the extension of such approaches to different species, tissues, and biological processes.}, } @article {pmid36533928, year = {2023}, author = {Wang, Z and Xu, S and Zheng, X and Zheng, X and Liu, M and Guo, G and Yu, Y and Han, X and Liu, Y and Wang, K and Zhang, W}, title = {Identification of Subunits for Novel Universal Vaccines against Three Predominant Serogroups and the Emerging O145 among Avian Pathogenic Escherichia coli by Pan-RV Pipeline.}, journal = {Applied and environmental microbiology}, volume = {89}, number = {1}, pages = {e0106122}, pmid = {36533928}, issn = {1098-5336}, mesh = {Animals ; Escherichia coli/genetics ; Serogroup ; *Escherichia coli Infections/prevention & control/veterinary ; Poultry ; Bacterial Vaccines ; *Escherichia coli Vaccines ; *Poultry Diseases/prevention & control/microbiology ; Chickens ; }, abstract = {Avian pathogenic Escherichia coli, a causative agent of avian colibacillosis, has been causing serious economic losses in the poultry industry. The increase in multidrug-resistant isolates and the complexity of the serotypes of this pathogen, especially the recently reported emergence of a newly predominant serogroup of O145, make the control of this disease difficult. To address this challenge, a high-throughput screening approach, called Pan-RV (Reverse vaccinology based on pangenome analysis), is proposed to search for universal protective antigens against the three traditional serogroups and the newly emerged O145. Using this approach, a total of 61 proteins regarded as probable antigens against the four important serogroups were screened from the core genome of 127 Avian pathogenic Escherichia coli (APEC) genomes, and six were verified by Western blots using antisera. Overall, our research will provide a foundation for the development of an APEC subunit vaccine against avian colibacillosis. Given the exponential growth of whole-genome sequencing (WGS) data, our Pan-RV pipeline will make screening of bacterial vaccine candidates inexpensive, rapid, and efficient. IMPORTANCE With the emergence of drug resistance and the newly predominant serogroup O145, the control of Avian pathogenic Escherichia coli is facing a serious challenge; an efficient immunological method is urgently needed. Here, for the first time, we propose a high-throughput screening approach to search for universal protective antigens against the three traditional serogroups and the newly emerged O145. Importantly, using this approach, a total of 61 proteins regarded as probable antigens against the four important serogroups were screened, and three were shown to be immunoreactive with all antisera (covering the four serogroups), thereby providing a foundation for the development of APEC subunit vaccines against avian colibacillosis. Further, our Pan-RV pipeline will provide immunological control strategies for pathogens with complex and variable genetic backgrounds such as Escherichia coli and will make screening of bacterial vaccine candidates more inexpensive, rapid, and efficient.}, } @article {pmid36533266, year = {2022}, author = {Usadel, B}, title = {Solanaceae pangenomes are coming of graphical age to bring heritability back.}, journal = {aBIOTECH}, volume = {3}, number = {4}, pages = {233-236}, pmid = {36533266}, issn = {2662-1738}, abstract = {Two recent articles describe a pangenome of potato and a graph-based pangenome for tomato, respectively. The latter improves our understanding of the tomato genomics architecture even further and the use of this graph-based pangenome versus a single reference dramatically improves heritability in tomato.}, } @article {pmid36532462, year = {2022}, author = {Cohn, AR and Orsi, RH and Carroll, LM and Liao, J and Wiedmann, M and Cheng, RA}, title = {Salmonella enterica serovar Cerro displays a phylogenetic structure and genomic features consistent with virulence attenuation and adaptation to cattle.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1005215}, pmid = {36532462}, issn = {1664-302X}, abstract = {Salmonella enterica subsp. enterica (S.) serovar Cerro is rarely isolated from human clinical cases of salmonellosis but represents the most common serovar isolated from cattle without clinical signs of illness in the United States. In this study, using a large, diverse set of 316 isolates, we utilized genomic methods to further elucidate the evolutionary history of S. Cerro and to identify genomic features associated with its apparent virulence attenuation in humans. Phylogenetic analyses showed that within this polyphyletic serovar, 98.4% of isolates (311/316) represent a monophyletic clade within section Typhi and the remaining 1.6% of isolates (5/316) form a monophyletic clade within subspecies enterica Clade A1. Of the section Typhi S. Cerro isolates, 93.2% of isolates (290/311) clustered into a large clonal clade comprised of predominantly sequence type (ST) 367 cattle and environmental isolates, while the remaining 6.8% of isolates (21/311), primarily from human clinical sources, clustered outside of this clonal clade. A tip-dated phylogeny of S. Cerro ST367 identified two major clades (I and II), one of which overwhelmingly consisted of cattle isolates that share a most recent common ancestor that existed circa 1975. Gene presence/absence and rarefaction curve analyses suggested that the pangenome of section Typhi S. Cerro is open, potentially reflecting the gain/loss of prophage; human isolates contained the most open pangenome, while cattle isolates had the least open pangenome. Hypothetically disrupted coding sequences (HDCs) displayed clade-specific losses of intact speC and sopA virulence genes within the large clonal S. Cerro clade, while loss of intact vgrG, araH, and vapC occurred in all section Typhi S. Cerro isolates. Further phenotypic analysis suggested that the presence of a premature stop codon in speC does not abolish ornithine decarboxylase activity in S. Cerro, likely due to the activity of the second ornithine decarboxylase encoded by speF, which remained intact in all isolates. Overall, our study identifies specific genomic features associated with S. Cerro's infrequent isolation from humans and its apparent adaptation to cattle, which has broader implications for informing our understanding of the evolutionary events facilitating host adaptation in Salmonella.}, } @article {pmid36529716, year = {2022}, author = {Cagirici, HB and Andorf, CM and Sen, TZ}, title = {Co-expression pan-network reveals genes involved in complex traits within maize pan-genome.}, journal = {BMC plant biology}, volume = {22}, number = {1}, pages = {595}, pmid = {36529716}, issn = {1471-2229}, mesh = {*Zea mays/genetics ; *Genome-Wide Association Study/methods ; Multifactorial Inheritance ; Phenotype ; Gene Regulatory Networks ; Polymorphism, Single Nucleotide/genetics ; }, abstract = {BACKGROUND: With the advances in the high throughput next generation sequencing technologies, genome-wide association studies (GWAS) have identified a large set of variants associated with complex phenotypic traits at a very fine scale. Despite the progress in GWAS, identification of genotype-phenotype relationship remains challenging in maize due to its nature with dozens of variants controlling the same trait. As the causal variations results in the change in expression, gene expression analyses carry a pivotal role in unraveling the transcriptional regulatory mechanisms behind the phenotypes.

RESULTS: To address these challenges, we incorporated the gene expression and GWAS-driven traits to extend the knowledge of genotype-phenotype relationships and transcriptional regulatory mechanisms behind the phenotypes. We constructed a large collection of gene co-expression networks and identified more than 2 million co-expressing gene pairs in the GWAS-driven pan-network which contains all the gene-pairs in individual genomes of the nested association mapping (NAM) population. We defined four sub-categories for the pan-network: (1) core-network contains the highest represented ~ 1% of the gene-pairs, (2) near-core network contains the next highest represented 1-5% of the gene-pairs, (3) private-network contains ~ 50% of the gene pairs that are unique to individual genomes, and (4) the dispensable-network contains the remaining 50-95% of the gene-pairs in the maize pan-genome. Strikingly, the private-network contained almost all the genes in the pan-network but lacked half of the interactions. We performed gene ontology (GO) enrichment analysis for the pan-, core-, and private- networks and compared the contributions of variants overlapping with genes and promoters to the GWAS-driven pan-network.

CONCLUSIONS: Gene co-expression networks revealed meaningful information about groups of co-regulated genes that play a central role in regulatory processes. Pan-network approach enabled us to visualize the global view of the gene regulatory network for the studied system that could not be well inferred by the core-network alone.}, } @article {pmid36526963, year = {2022}, author = {Abraha, HB and Lee, JW and Kim, G and Ferdiansyah, MK and Ramesha, RM and Kim, KP}, title = {Genomic diversity and comprehensive taxonomical classification of 61 Bacillus subtilis group member infecting bacteriophages, and the identification of ortholog taxonomic signature genes.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {835}, pmid = {36526963}, issn = {1471-2164}, support = {2021R1A2C2008022//National Research Foundation of Korea/ ; 2021R1A2C2008022//National Research Foundation of Korea/ ; 2021R1A2C2008022//National Research Foundation of Korea/ ; }, mesh = {*Bacteriophages/genetics ; *Bacillus/genetics ; Bacillus subtilis/genetics ; Genomics ; Genome, Viral ; Phylogeny ; }, abstract = {BACKGROUND: Despite the applications of Bacillus subtilis group species in various sectors, limited information is available regarding their phages. Here, 61 B. subtilis group species-infecting phages (BSPs) were studied for their taxonomic classification considering the genome-size, genomic diversity, and the host, followed by the identification of orthologs taxonomic signature genes.

RESULTS: BSPs have widely ranging genome sizes that can be bunched into groups to demonstrate correlations to family and subfamily classifications. Comparative analysis re-confirmed the existing, BSPs-containing 14 genera and 21 species and displayed inter-genera similarities within existing subfamilies. Importantly, it also revealed the need for the creation of new taxonomic classifications, including 28 species, nine genera, and two subfamilies (New subfamily1 and New subfamily2) to accommodate inter-genera relatedness. Following pangenome analysis, no ortholog shared by all BSPs was identified, while orthologs, namely, the tail fibers/spike proteins and poly-gamma-glutamate hydrolase, that are shared by more than two-thirds of the BSPs were identified. More importantly, major capsid protein (MCP) type I, MCP type II, MCP type III and peptidoglycan binding proteins that are distinctive orthologs for Herelleviridae, Salasmaviridae, New subfamily1, and New subfamily2, respectively, were identified and analyzed which could serve as signatures to distinguish BSP members of the respective taxon.

CONCLUSIONS: In this study, we show the genomic diversity and propose a comprehensive classification of 61 BSPs, including the proposition for the creation of two new subfamilies, followed by the identification of orthologs taxonomic signature genes, potentially contributing to phage taxonomy.}, } @article {pmid36523157, year = {2023}, author = {Shi, J and Tian, Z and Lai, J and Huang, X}, title = {Plant pan-genomics and its applications.}, journal = {Molecular plant}, volume = {16}, number = {1}, pages = {168-186}, doi = {10.1016/j.molp.2022.12.009}, pmid = {36523157}, issn = {1752-9867}, mesh = {*Genomics ; *Genome, Plant/genetics ; Chromosome Mapping ; }, abstract = {Plant genomes are so highly diverse that a substantial proportion of genomic sequences are not shared among individuals. The variable DNA sequences, along with the conserved core sequences, compose the more sophisticated pan-genome that represents the collection of all non-redundant DNA in a species. With rapid progress in genome sequencing technologies, pan-genome research in plants is now accelerating. Here we review recent advances in plant pan-genomics, including major driving forces of structural variations that constitute the variable sequences, methodological innovations for representing the pan-genome, and major successes in constructing plant pan-genomes. We also summarize recent efforts toward decoding the remaining dark matter in telomere-to-telomere or gapless plant genomes. These new genome resources, which have remarkable advantages over numerous previously assembled less-than-perfect genomes, are expected to become new references for genetic studies and plant breeding.}, } @article {pmid36516689, year = {2023}, author = {Hussain, J and Cohen, M and O'Malley, CJ and Mantri, N and Li, Y and Mueller, JF and Greaves, R and Wang, X}, title = {Detections of organophosphate and pyrethroid insecticide metabolites in urine and sweat obtained from women during infrared sauna and exercise: A pilot crossover study.}, journal = {International journal of hygiene and environmental health}, volume = {248}, number = {}, pages = {114091}, doi = {10.1016/j.ijheh.2022.114091}, pmid = {36516689}, issn = {1618-131X}, mesh = {Humans ; Female ; *Insecticides/urine ; Cross-Over Studies ; Sweat/chemistry/metabolism ; Organophosphates/urine ; *Steam Bath ; *Pyrethrins ; *Pesticides/urine ; Environmental Exposure/analysis ; }, abstract = {Synthetic pesticides such as organophosphates and pyrethroids are commonly used worldwide yet the metabolic and long-term human health effects of these environmental exposures are unclear. Urinary detections of metabolites involving both classes of insecticides have been documented in various global populations. However, reports documenting similar detections in human sweat are sparse. In this study, the concentrations of four insecticide metabolites were measured using liquid chromatography coupled with tandem mass spectrometry in repeated sweat and urine collections (n = 85) from 10 women undergoing three interventions (control, infrared sauna and indoor bicycling) within a single-blinded randomised crossover trial. The Friedman test with post-hoc two-way analysis of variance, the related-samples Wilcoxon signed rank test and the Spearman's rank-order correlation test were used to analyse the results. Organophosphate metabolites were detected in 84.6% (22/26) and pyrethroids in 26.9% (7/26) of the collected sweat samples (pooled per individual, per intervention). Urinary concentrations of three of the four metabolites marginally increased after infrared sauna bathing: 3,5,6-trichloro-2-pyridinol (z = 2.395, p = 0.017); 3-phenoxybenzoic acid (z = 2.599, p = 0.009); and trans-3-(2,2-dichlorovinyl)-2,2-dimethylcyclopropane-1-carboxylic acid (z = 2.090, p = 0.037). Urinary 3-phenoxybenzoic acid also increased after exercise (z = 2.073, p = 0.038) and demonstrated the most temporal variability (days to weeks) of any of the urinary metabolites. Definitive sweat/urine correlations were not demonstrated. These results indicate metabolites from organophosphate and pyrethroid pesticides can be detected in human sweat and this raises intriguing questions about perspiration and its role in the metabolism and excretion of synthetic pesticides.}, } @article {pmid36515536, year = {2023}, author = {Rumball, NA and Alm, EW and McLellan, SL}, title = {Genetic Determinants of Escherichia coli Survival in Beach Sand.}, journal = {Applied and environmental microbiology}, volume = {89}, number = {1}, pages = {e0142322}, pmid = {36515536}, issn = {1098-5336}, mesh = {Animals ; Humans ; *Sand ; Escherichia coli ; Lakes ; Michigan ; *Charadriiformes ; Environmental Monitoring/methods ; Feces ; Bathing Beaches ; Water Microbiology ; }, abstract = {Escherichia coli contain a high level of genetic diversity and are generally associated with the guts of warm-blooded animals but have also been isolated from secondary habitats outside hosts. We used E. coli isolates from previous in situ microcosm experiments conducted under actual beach conditions and performed population-level genomic analysis to identify accessory genes associated with survival within the beach sand environment. E. coli strains capable of surviving had been selected for by seeding isolates originating from sand, sewage, and gull waste (n = 528; 176 from each source) into sand, which was sealed in microcosm chambers and buried for 45 days in the backshore beach of Lake Michigan. In the current work, survival-associated genes were identified by comparing the pangenome of viable E. coli populations at the end of the microcosm experiment with the original isolate collection and identifying loci enriched in the out put samples. We found that environmental survival was associated with a wide variety of genetic factors, with the majority corresponding to metabolism enzymes and transport proteins. Of the 414 unique functions identified, most were present across E. coli phylogroups, except B2 which is often associated with human pathogens. Gene modules that were enriched in surviving populations included a betaine biosynthesis pathway, which produces an osmoprotectant, and the GABA (gamma-aminobutyrate) biosynthesis pathway, which aids in pH homeostasis and nutrient use versatility. Overall, these results demonstrate that the genetic flexibility within this species allows for survival in the environment for extended periods. IMPORTANCE Escherichia coli is commonly used as an indicator of recent fecal pollution in recreational water despite its known ability to survive in secondary environments, such as beach sand. These long-term survivors from sand reservoirs can be introduced into the water column through wave action or runoff during precipitation events, thereby impacting the perception of local water quality. Current beach monitoring methods cannot differentiate long-term environmental survivors from E. coli derived from recent fecal input, resulting in inaccurate monitoring results and unnecessary beach closures. This work identified the genetic factors that are associated with long-term survivors, providing insight into the mechanistic basis for E. coli accumulation in beach sand. A greater understanding of the intrinsic ability of E. coli to survive long-term and conditions that promote such survival will provide evidence of the limitations of beach water quality assessments using this indicator.}, } @article {pmid36511689, year = {2023}, author = {Dillard, LR and Glass, EM and Lewis, AL and Thomas-White, K and Papin, JA}, title = {Metabolic Network Models of the Gardnerella Pangenome Identify Key Interactions with the Vaginal Environment.}, journal = {mSystems}, volume = {8}, number = {1}, pages = {e0068922}, pmid = {36511689}, issn = {2379-5077}, support = {R01 AI114635/AI/NIAID NIH HHS/United States ; }, mesh = {Female ; Humans ; *Vaginosis, Bacterial/genetics ; Gardnerella ; Gardnerella vaginalis/genetics ; Vagina/microbiology ; Bacteria ; Metabolic Networks and Pathways/genetics ; }, abstract = {Gardnerella is the primary pathogenic bacterial genus present in the polymicrobial condition known as bacterial vaginosis (BV). Despite BV's high prevalence and associated chronic and acute women's health impacts, the Gardnerella pangenome is largely uncharacterized at both the genetic and functional metabolic levels. Here, we used genome-scale metabolic models to characterize in silico the Gardnerella pangenome metabolic content. We also assessed the metabolic functional capacity in a BV-positive cervicovaginal fluid context. The metabolic capacity varied widely across the pangenome, with 38.15% of all reactions being core to the genus, compared to 49.60% of reactions identified as being unique to a smaller subset of species. We identified 57 essential genes across the pangenome via in silico gene essentiality screens within two simulated vaginal metabolic environments. Four genes, gpsA, fas, suhB, and psd, were identified as core essential genes critical for the metabolic function of all analyzed bacterial species of the Gardnerella genus. Further understanding these core essential metabolic functions could inform novel therapeutic strategies to treat BV. Machine learning applied to simulated metabolic network flux distributions showed limited clustering based on the sample isolation source, which further supports the presence of extensive core metabolic functionality across this genus. These data represent the first metabolic modeling of the Gardnerella pangenome and illustrate strain-specific interactions with the vaginal metabolic environment across the pangenome. IMPORTANCE Bacterial vaginosis (BV) is the most common vaginal infection among reproductive-age women. Despite its prevalence and associated chronic and acute women's health impacts, the diverse bacteria involved in BV infection remain poorly characterized. Gardnerella is the genus of bacteria most commonly and most abundantly represented during BV. In this paper, we use metabolic models, which are a computational representation of the possible functional metabolism of an organism, to investigate metabolic conservation, gene essentiality, and pathway utilization across 110 Gardnerella strains. These models allow us to investigate in silico how strains may differ with respect to their metabolic interactions with the vaginal-host environment.}, } @article {pmid36503997, year = {2023}, author = {Chan, C and Salomé, PA}, title = {What makes a good reference? First steps toward a Chlamydomonas pangenome.}, journal = {The Plant cell}, volume = {35}, number = {2}, pages = {628-629}, pmid = {36503997}, issn = {1532-298X}, mesh = {*Chlamydomonas/genetics ; Genomics ; }, } @article {pmid36494615, year = {2022}, author = {Johansson, P and Säde, E and Hultman, J and Auvinen, P and Björkroth, J}, title = {Pangenome and genomic taxonomy analyses of Leuconostoc gelidum and Leuconostoc gasicomitatum.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {818}, pmid = {36494615}, issn = {1471-2164}, support = {307855//Academy of Finland/ ; NNF20OC0061239//Novo Nordisk Fonden/ ; }, mesh = {Phylogeny ; *Leuconostoc/genetics ; *DNA ; Food Microbiology ; }, abstract = {BACKGROUND: Leuconostoc gelidum and Leuconostoc gasicomitatum have dual roles in foods. They may spoil cold-stored packaged foods but can also be beneficial in kimchi fermentation. The impact in food science as well as the limited number of publicly available genomes prompted us to create pangenomes and perform genomic taxonomy analyses starting from de novo sequencing of the genomes of 37 L. gelidum/L. gasicomitatum strains from our culture collection. Our aim was also to evaluate the recently proposed change in taxonomy as well as to study the genomes of strains with different lifestyles in foods.

METHODS: We selected as diverse a set of strains as possible in terms of sources, previous genotyping results and geographical distribution, and included also 10 publicly available genomes in our analyses. We studied genomic taxonomy using pairwise average nucleotide identity (ANI) and calculation of digital DNA-DNA hybridisation (dDDH) scores. Phylogeny analyses were done using the core gene set of 1141 single-copy genes and a set of housekeeping genes commonly used for lactic acid bacteria. In addition, the pangenome and core genome sizes as well as some properties, such as acquired antimicrobial resistance (AMR), important due to the growth in foods, were analysed.

RESULTS: Genome relatedness indices and phylogenetic analyses supported the recently suggested classification that restores the taxonomic position of L. gelidum subsp. gasicomitatum back to the species level as L. gasicomitatum. Genome properties, such as size and coding potential, revealed limited intraspecies variation and showed no attribution to the source of isolation. The distribution of the unique genes between species and subspecies was not associated with the previously documented lifestyle in foods. None of the strains carried any acquired AMR genes or genes associated with any known form of virulence.

CONCLUSION: Genome-wide examination of strains confirms that the proposition to restore the taxonomic position of L. gasicomitatum is justified. It further confirms that the distribution and lifestyle of L. gelidum and L. gasicomitatum in foods have not been driven by the evolution of functional and phylogenetic diversification detectable at the genome level.}, } @article {pmid36494611, year = {2022}, author = {Guardia, AE and Wagner, A and Busalmen, JP and Di Capua, C and Cortéz, N and Beligni, MV}, title = {The draft genome of Andean Rhodopseudomonas sp. strain AZUL predicts genome plasticity and adaptation to chemical homeostasis.}, journal = {BMC microbiology}, volume = {22}, number = {1}, pages = {297}, pmid = {36494611}, issn = {1471-2180}, mesh = {*Rhodopseudomonas/genetics ; Adaptation, Physiological/genetics ; Base Sequence ; Genomics ; Acclimatization ; Phylogeny ; }, abstract = {The genus Rhodopseudomonas comprises purple non-sulfur bacteria with extremely versatile metabolisms. Characterization of several strains revealed that each is a distinct ecotype highly adapted to its specific micro-habitat. Here we present the sequencing, genomic comparison and functional annotation of AZUL, a Rhodopseudomonas strain isolated from a high altitude Andean lagoon dominated by extreme conditions and fluctuating levels of chemicals. Average nucleotide identity (ANI) analysis of 39 strains of this genus showed that the genome of AZUL is 96.2% identical to that of strain AAP120, which suggests that they belong to the same species. ANI values also show clear separation at the species level with the rest of the strains, being more closely related to R. palustris. Pangenomic analyses revealed that the genus Rhodopseudomonas has an open pangenome and that its core genome represents roughly 5 to 12% of the total gene repertoire of the genus. Functional annotation showed that AZUL has genes that participate in conferring genome plasticity and that, in addition to sharing the basal metabolic complexity of the genus, it is also specialized in metal and multidrug resistance and in responding to nutrient limitation. Our results also indicate that AZUL might have evolved to use some of the mechanisms involved in resistance as redox reactions for bioenergetic purposes. Most of those features are shared with strain AAP120, and mainly involve the presence of additional orthologs responsible for the mentioned processes. Altogether, our results suggest that AZUL, one of the few bacteria from its habitat with a sequenced genome, is highly adapted to the extreme and changing conditions that constitute its niche.}, } @article {pmid36479628, year = {2022}, author = {Adsit, FG and Randall, TA and Locklear, J and Kurtz, DM}, title = {The emergence of the tetrathionate reductase operon in the Escherichia coli/Shigella pan-genome.}, journal = {MicrobiologyOpen}, volume = {11}, number = {6}, pages = {e1333}, pmid = {36479628}, issn = {2045-8827}, mesh = {*Escherichia coli/genetics ; *Shigella ; Virulence Factors/genetics ; }, abstract = {Escherichia coli pathogenic variants (pathovars) are generally characterized by defined virulence traits and are susceptible to the evolution of hybridized identities due to the considerable plasticity of the E. coli genome. We have isolated a strain from a purified diet intended for research animals that further demonstrates the ability of E. coli to acquire novel genetic elements leading potentially to emergent new pathovars. Utilizing next generation sequencing to obtain a whole genome profile, we report an atypical strain of E. coli, EcoFA807-17, possessing a tetrathionate reductase (ttr) operon, which enables the utilization of tetrathionate as an electron acceptor, thus facilitating respiration in anaerobic environments such as the mammalian gut. The ttr operon is a potent virulence factor for several enteric pathogens, most prominently Salmonella enterica. However, the presence of chromosomally integrated tetrathionate reductase genes does not appear to have been previously reported in wild-type E. coli or Shigella. Accordingly, it is possible that the appearance of this virulence factor may signal the evolution of new mechanisms of pathogenicity in E. coli and Shigella and may potentially alter the effectiveness of existing assays using tetrathionate reductase as a unique marker for the detection of Salmonella enterica.}, } @article {pmid36479579, year = {2022}, author = {Droc, G and Martin, G and Guignon, V and Summo, M and Sempéré, G and Durant, E and Soriano, A and Baurens, FC and Cenci, A and Breton, C and Shah, T and Aury, JM and Ge, XJ and Harrison, PH and Yahiaoui, N and D'Hont, A and Rouard, M}, title = {The banana genome hub: a community database for genomics in the Musaceae.}, journal = {Horticulture research}, volume = {9}, number = {}, pages = {uhac221}, pmid = {36479579}, issn = {2662-6810}, abstract = {The Banana Genome Hub provides centralized access for genome assemblies, annotations, and the extensive related omics resources available for bananas and banana relatives. A series of tools and unique interfaces are implemented to harness the potential of genomics in bananas, leveraging the power of comparative analysis, while recognizing the differences between datasets. Besides effective genomic tools like BLAST and the JBrowse genome browser, additional interfaces enable advanced gene search and gene family analyses including multiple alignments and phylogenies. A synteny viewer enables the comparison of genome structures between chromosome-scale assemblies. Interfaces for differential expression analyses, metabolic pathways and GO enrichment were also added. A catalogue of variants spanning the banana diversity is made available for exploration, filtering, and export to a wide variety of software. Furthermore, we implemented new ways to graphically explore gene presence-absence in pangenomes as well as genome ancestry mosaics for cultivated bananas. Besides, to guide the community in future sequencing efforts, we provide recommendations for nomenclature of locus tags and a curated list of public genomic resources (assemblies, resequencing, high density genotyping) and upcoming resources-planned, ongoing or not yet public. The Banana Genome Hub aims at supporting the banana scientific community for basic, translational, and applied research and can be accessed at https://banana-genome-hub.southgreen.fr.}, } @article {pmid36478861, year = {2022}, author = {Abou Abdallah, R and Million, M and Delerce, J and Anani, H and Diop, A and Caputo, A and Zgheib, R and Rousset, E and Sidi Boumedine, K and Raoult, D and Fournier, PE}, title = {Pangenomic analysis of Coxiella burnetii unveils new traits in genome architecture.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1022356}, pmid = {36478861}, issn = {1664-302X}, abstract = {Coxiella burnetii is the etiological agent of Q fever, a worldwide zoonosis able to cause large outbreaks. The disease is polymorphic. Symptomatic primary infection is named acute Q fever and is associated with hepatitis, pneumonia, fever, and auto-immune complications while persistent focalized infections, mainly endocarditis, and vascular infections, occur in a minority of patients but are potentially lethal. In order to evaluate the genomic features, genetic diversity, evolution, as well as genetic determinants of antibiotic resistance, pathogenicity, and ability to cause outbreaks of Q fever, we performed a pangenomic analysis and genomic comparison of 75 C. burnetii strains including 63 newly sequenced genomes. Our analysis demonstrated that C. burnetii has an open pangenome, unique genes being found in many strains. In addition, pathogenicity islands were detected in all genomes. In consequence C. burnetii has a high genomic plasticity, higher than that of other intracellular bacteria. The core- and pan-genomes are made of 1,211 and 4,501 genes, respectively (ratio 0.27). The core gene-based phylogenetic analysis matched that obtained from multi-spacer typing and the distribution of plasmid types. Genomic characteristics were associated to clinical and epidemiological features. Some genotypes were associated to specific clinical forms and countries. MST1 genotype strains were associated to acute Q fever. A significant association was also found between clinical forms and plasmids. Strains harboring the QpRS plasmid were never found in acute Q fever and were only associated to persistent focalized infections. The QpDV and QpH1 plasmids were associated to acute Q fever. In addition, the Guyanese strain CB175, the most virulent strain to date, exhibited a unique MST genotype, a distinct COG profile and an important variation in gene number that may explain its unique pathogenesis. Therefore, strain-specific factors play an important role in determining the epidemiological and clinical manifestations of Q fever alongside with host-specific factors (valvular and vascular defects notably).}, } @article {pmid36476389, year = {2022}, author = {Djeghout, B and Bloomfield, SJ and Rudder, S and Elumogo, N and Mather, AE and Wain, J and Janecko, N}, title = {Comparative genomics of Campylobacter jejuni from clinical campylobacteriosis stool specimens.}, journal = {Gut pathogens}, volume = {14}, number = {1}, pages = {45}, pmid = {36476389}, issn = {1757-4749}, support = {BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/R012504/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, abstract = {BACKGROUND: Campylobacter jejuni is a pervasive pathogen of major public health concern with a complex ecology requiring accurate and informative approaches to define pathogen diversity during outbreak investigations. Source attribution analysis may be confounded if the genetic diversity of a C. jejuni population is not adequately captured in a single specimen. The aim of this study was to determine the genomic diversity of C. jejuni within individual stool specimens from four campylobacteriosis patients. Direct plating and pre-culture filtration of one stool specimen per patient was used to culture multiple isolates per stool specimen. Whole genome sequencing and pangenome level analysis were used to investigate genomic diversity of C. jejuni within a patient.

RESULTS: A total 92 C. jejuni isolates were recovered from four patients presenting with gastroenteritis. The number of isolates ranged from 13 to 30 per patient stool. Three patients yielded a single C. jejuni multilocus sequence type: ST-21 (n = 26, patient 4), ST-61 (n = 30, patient 1) and ST-2066 (n = 23, patient 2). Patient 3 was infected with two different sequence types [ST-51 (n = 12) and ST-354 (n = 1)]. Isolates belonging to the same sequence type from the same patient specimen shared 12-43 core non-recombinant SNPs and 0-20 frameshifts with each other, and the pangenomes of each sequence type consisted of 1406-1491 core genes and 231-264 accessory genes. However, neither the mutation nor the accessory genes were connected to a specific functional gene category.

CONCLUSIONS: Our findings show that the C. jejuni population recovered from an individual patient's stool are genetically diverse even within the same ST and may have shared common ancestors before specimens were obtained. The population is unlikely to have evolved from a single isolate at the time point of initial patient infection, leading us to conclude that patients were likely infected with a heterogeneous C. jejuni population. The diversity of the C. jejuni population found within individual stool specimens can inform future methodological approaches to attribution and outbreak investigations.}, } @article {pmid36476074, year = {2022}, author = {Ullah, A and Ullah Khan, S and Haq, MU and Ahmad, S and Irfan, M and Asif, M and Muhseen, ZT and Alkeraidees, MS and Allemailem, KS and Alrumaihi, F and Almatroudi, A}, title = {Computational study to investigate Proteus mirabilis proteomes for multi-epitope vaccine construct design.}, journal = {Journal of biomolecular structure & dynamics}, volume = {}, number = {}, pages = {1-12}, doi = {10.1080/07391102.2022.2153920}, pmid = {36476074}, issn = {1538-0254}, abstract = {Proteus mirabilis is a gram-negative bacterium particularly known for its unique swarming ability. The swarming gives the bacteria ability to enhance adherence to the catheter surface and epithelium cells of the urethra to cause catheter associated urinary tract infections. P. mirabilis has evolved resistant to antibiotics. Additionally, there is an approved vaccine against P. mirabilis, thus demanding for identification of new vaccine targets. This gram-negative bacterium consists of 19,502 core proteins, out of which 19,063 are redundant proteins and remaining 439 are non-redundant proteins. The non-redundant proteins have 21 proteins present on the cell surface out of which 11 proteins are virulent. Antigenicity analysis predicted only 2 proteins as antigenic (fimbrial biogenesis outer membrane usher protein and ligand-gated channel protein). Four and seven B-cells epitopes were predicted from the former and later proteins, respectively. The predicted B-cells epitopes were used for T- cells epitopes prediction. The predicted epitopes were linked to each other through GPGPG linkers and joined with cholera toxin beta subunit adjuvant. A multi-epitopes vaccine construct consisting of 226 residues was docked with MHC-I, MHC-II and TLR-4. The best docked complex in each case has binding energy of -714.6, -744.6 and -829.5 kcal/mol, respectively. Moreover, the docking results were validated through molecular dynamics simulation and binding free energies estimation. The net energy of -137.2 kcal/mol was calculated for vaccine-MHC-I complex, -133.39 kcal/mol for vaccine-MHC-II and -158.68 kcal/mol for vaccine-TLR-4 complex. The designed vaccine construct could provoke immune responses against targeted pathogen and may be used in experimental testing.Communicated by Ramaswamy H. Sarma.}, } @article {pmid36474047, year = {2022}, author = {Wang, M and Li, J and Qi, Z and Long, Y and Pei, L and Huang, X and Grover, CE and Du, X and Xia, C and Wang, P and Liu, Z and You, J and Tian, X and Ma, Y and Wang, R and Chen, X and He, X and Fang, DD and Sun, Y and Tu, L and Jin, S and Zhu, L and Wendel, JF and Zhang, X}, title = {Genomic innovation and regulatory rewiring during evolution of the cotton genus Gossypium.}, journal = {Nature genetics}, volume = {54}, number = {12}, pages = {1959-1971}, pmid = {36474047}, issn = {1546-1718}, mesh = {*Gossypium/genetics ; *Genomics ; Chromatin ; }, abstract = {Phenotypic diversity and evolutionary innovation ultimately trace to variation in genomic sequence and rewiring of regulatory networks. Here, we constructed a pan-genome of the Gossypium genus using ten representative diploid genomes. We document the genomic evolutionary history and the impact of lineage-specific transposon amplification on differential genome composition. The pan-3D genome reveals evolutionary connections between transposon-driven genome size variation and both higher-order chromatin structure reorganization and the rewiring of chromatin interactome. We linked changes in chromatin structures to phenotypic differences in cotton fiber and identified regulatory variations that decode the genetic basis of fiber length, the latter enabled by sequencing 1,005 transcriptomes during fiber development. We showcase how pan-genomic, pan-3D genomic and genetic regulatory data serve as a resource for delineating the evolutionary basis of spinnable cotton fiber. Our work provides insights into the evolution of genome organization and regulation and will inform cotton improvement by enabling regulome-based approaches.}, } @article {pmid36469788, year = {2022}, author = {Yebra, G and Harling-Lee, JD and Lycett, S and Aarestrup, FM and Larsen, G and Cavaco, LM and Seo, KS and Abraham, S and Norris, JM and Schmidt, T and Ehlers, MM and Sordelli, DO and Buzzola, FR and Gebreyes, WA and Gonçalves, JL and Dos Santos, MV and Zakaria, Z and Rall, VLM and Keane, OM and Niedziela, DA and Paterson, GK and Holmes, MA and Freeman, TC and Fitzgerald, JR}, title = {Multiclonal human origin and global expansion of an endemic bacterial pathogen of livestock.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {119}, number = {50}, pages = {e2211217119}, pmid = {36469788}, issn = {1091-6490}, support = {201531/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Female ; Humans ; Cattle ; Animals ; *Staphylococcus aureus/genetics ; Livestock/genetics ; *Staphylococcal Infections/epidemiology/veterinary/genetics ; Genome ; Host Specificity ; }, abstract = {Most new pathogens of humans and animals arise via switching events from distinct host species. However, our understanding of the evolutionary and ecological drivers of successful host adaptation, expansion, and dissemination are limited. Staphylococcus aureus is a major bacterial pathogen of humans and a leading cause of mastitis in dairy cows worldwide. Here we trace the evolutionary history of bovine S. aureus using a global dataset of 10,254 S. aureus genomes including 1,896 bovine isolates from 32 countries in 6 continents. We identified 7 major contemporary endemic clones of S. aureus causing bovine mastitis around the world and traced them back to 4 independent host-jump events from humans that occurred up to 2,500 y ago. Individual clones emerged and underwent clonal expansion from the mid-19th to late 20th century coinciding with the commercialization and industrialization of dairy farming, and older lineages have become globally distributed via established cattle trade links. Importantly, we identified lineage-dependent differences in the frequency of host transmission events between humans and cows in both directions revealing high risk clones threatening veterinary and human health. Finally, pangenome network analysis revealed that some bovine S. aureus lineages contained distinct sets of bovine-associated genes, consistent with multiple trajectories to host adaptation via gene acquisition. Taken together, we have dissected the evolutionary history of a major endemic pathogen of livestock providing a comprehensive temporal, geographic, and gene-level perspective of its remarkable success.}, } @article {pmid36469554, year = {2022}, author = {Zhao, C and Goldman, M and Smith, BJ and Pollard, KS}, title = {Genotyping Microbial Communities with MIDAS2: From Metagenomic Reads to Allele Tables.}, journal = {Current protocols}, volume = {2}, number = {12}, pages = {e604}, pmid = {36469554}, issn = {2691-1299}, support = {R01 HL160862/HL/NHLBI NIH HHS/United States ; }, mesh = {*Metagenome/genetics ; Genotype ; Alleles ; *Microbiota/genetics ; Nucleotides ; }, abstract = {The Metagenomic Intra-Species Diversity Analysis System 2 (MIDAS2) is a scalable pipeline that identifies single nucleotide variants and gene copy number variants in metagenomes using comprehensive reference databases built from public microbial genome collections (metagenotyping). MIDAS2 is the first metagenotyping tool with functionality to control metagenomic read mapping filters and to customize the reference database to the microbial community, features that improve the precision and recall of detected variants. In this article we present four basic protocols for the most common use cases of MIDAS2, along with supporting protocols for installation and use. In addition, we provide in-depth guidance on adjusting command line parameters, editing the reference database, optimizing hardware utilization, and understanding the metagenotyping results. All the steps of metagenotyping, from raw sequencing reads to population genetic analysis, are demonstrated with example data in two downloadable sequencing libraries of single-end metagenomic reads representing a mixture of multiple bacterial species. This set of protocols empowers users to accurately genotype hundreds of species in thousands of samples, providing rich genetic data for studying the evolution and strain-level ecology of microbial communities. © 2022 The Authors. Current Protocols published by Wiley Periodicals LLC. Basic Protocol 1: Species prescreening Basic Protocol 2: Download MIDAS reference database Basic Protocol 3: Population single nucleotide variant calling Basic Protocol 4: Pan-genome copy number variant calling Support Protocol 1: Installing MIDAS2 Support Protocol 2: Command line inputs Support Protocol 3: Metagenotyping with a custom collection of genomes Support Protocol 4: Metagenotyping with advanced parameters.}, } @article {pmid36469480, year = {2022}, author = {Pais, AKL and Santos, LVSD and Albuquerque, GMR and Farias, ARG and Silva Junior, WJ and Balbino, VQ and Silva, AMF and Gama, MASD and Souza, EB}, title = {Comparative genomics and phylogenomics of the Ralstonia solanacearum Moko ecotype and its symptomatological variants.}, journal = {Genetics and molecular biology}, volume = {45}, number = {4}, pages = {e20220038}, pmid = {36469480}, issn = {1415-4757}, abstract = {Banana tree bacterial wilt is caused by the Ralstonia solanacearum Moko ecotype. These strains vary in their symptom progression in banana, and are classified as typical Moko variants (phylotype IIA and IIB strains from across Central and South America), Bugtok variant (Philippines), and Sergipe facies (the states of Sergipe and Alagoas, Brazil). This study used comparative genomic and phylogenomic approaches to identify a correlation between the symptom progression of the Moko ecotypes based on the analysis of 23 available genomes. Average nucleotide identity and in silico DNA-DNA hybridization revealed a high correlation (>96% and >78%, respectively) between the genomes of Moko variants. Pan-genome analysis identified 21.3% of inheritable regions between representatives of the typical Moko and Sergipe facies variants, which could be traced to an abundance of exclusive homolog clusters. Moko ecotype genomes shared 1,951 orthologous genes, but representatives with typical symptoms did not display unique orthologues. Moreover, Bugtok disease and Sergipe facies genomes did not share any unique genes, suggesting convergent evolution to a shared symptom progression. Overall, genomic and phylogenomic analyses were insufficient to differentiate the Moko variants based on symptom progression.}, } @article {pmid36467270, year = {2022}, author = {Lee, JH and Venkatesh, J and Jo, J and Jang, S and Kim, GW and Kim, JM and Han, K and Ro, N and Lee, HY and Kwon, JK and Kim, YM and Lee, TH and Choi, D and Van Deynze, A and Hill, T and Kfir, N and Freiman, A and Davila Olivas, NH and Elkind, Y and Paran, I and Kang, BC}, title = {High-quality chromosome-scale genomes facilitate effective identification of large structural variations in hot and sweet peppers.}, journal = {Horticulture research}, volume = {9}, number = {}, pages = {uhac210}, pmid = {36467270}, issn = {2662-6810}, abstract = {Pepper (Capsicum annuum) is an important vegetable crop that has been subjected to intensive breeding, resulting in limited genetic diversity, especially for sweet peppers. Previous studies have reported pepper draft genome assemblies using short read sequencing, but their capture of the extent of large structural variants (SVs), such as presence-absence variants (PAVs), inversions, and copy-number variants (CNVs) in the complex pepper genome falls short. In this study, we sequenced the genomes of representative sweet and hot pepper accessions by long-read and/or linked-read methods and advanced scaffolding technologies. First, we developed a high-quality reference genome for the sweet pepper cultivar 'Dempsey' and then used the reference genome to identify SVs in 11 other pepper accessions and constructed a graph-based pan-genome for pepper. We annotated an average of 42 972 gene families in each pepper accession, defining a set of 19 662 core and 23 115 non-core gene families. The new pepper pan-genome includes informative variants, 222 159 PAVs, 12 322 CNVs, and 16 032 inversions. Pan-genome analysis revealed PAVs associated with important agricultural traits, including potyvirus resistance, fruit color, pungency, and pepper fruit orientation. Comparatively, a large number of genes are affected by PAVs, which is positively correlated with the high frequency of transposable elements (TEs), indicating TEs play a key role in shaping the genomic landscape of peppers. The datasets presented herein provide a powerful new genomic resource for genetic analysis and genome-assisted breeding for pepper improvement.}, } @article {pmid36466678, year = {2022}, author = {Núñez-Montero, K and Rojas-Villalta, D and Barrientos, L}, title = {Antarctic Sphingomonas sp. So64.6b showed evolutive divergence within its genus, including new biosynthetic gene clusters.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1007225}, pmid = {36466678}, issn = {1664-302X}, abstract = {INTRODUCTION: The antibiotic crisis is a major human health problem. Bioprospecting screenings suggest that proteobacteria and other extremophile microorganisms have biosynthetic potential for the production novel antimicrobial compounds. An Antarctic Sphingomonas strain (So64.6b) previously showed interesting antibiotic activity and elicitation response, then a relationship between environmental adaptations and its biosynthetic potential was hypothesized. We aimed to determine the genomic characteristics in So64.6b strain related to evolutive traits for the adaptation to the Antarctic environment that could lead to its diversity of potentially novel antibiotic metabolites.

METHODS: The complete genome sequence of the Antarctic strain was obtained and mined for Biosynthetic Gene Clusters (BGCs) and other unique genes related to adaptation to extreme environments. Comparative genome analysis based on multi-locus phylogenomics, BGC phylogeny, and pangenomics were conducted within the closest genus, aiming to determine the taxonomic affiliation and differential characteristics of the Antarctic strain.

RESULTS AND DISCUSSION: The Antarctic strain So64.6b showed a closest identity with Sphingomonas alpina, however containing a significant genomic difference of ortholog cluster related to degradation multiple pollutants. Strain So64.6b had a total of six BGC, which were predicted with low to no similarity with other reported clusters; three were associated with potential novel antibiotic compounds using ARTS tool. Phylogenetic and synteny analysis of a common BGC showed great diversity between Sphingomonas genus but grouping in clades according to similar isolation environments, suggesting an evolution of BGCs that could be linked to the specific ecosystems. Comparative genomic analysis also showed that Sphingomonas species isolated from extreme environments had the greatest number of predicted BGCs and a higher percentage of genetic content devoted to BGCs than the isolates from mesophilic environments. In addition, some extreme-exclusive clusters were found related to oxidative and thermal stress adaptations, while pangenome analysis showed unique resistance genes on the Antarctic strain included in genetic islands. Altogether, our results showed the unique genetic content on Antarctic strain Sphingomonas sp. So64.6, -a probable new species of this genetically divergent genus-, which could have potentially novel antibiotic compounds acquired to cope with Antarctic poly-extreme conditions.}, } @article {pmid36466658, year = {2022}, author = {Jesus, HNR and Rocha, DJPG and Ramos, RTJ and Silva, A and Brenig, B and Góes-Neto, A and Costa, MM and Soares, SC and Azevedo, V and Aguiar, ERGR and Martínez-Martínez, L and Ocampo, A and Alibi, S and Dorta, A and Pacheco, LGC and Navas, J}, title = {Pan-genomic analysis of Corynebacterium amycolatum gives insights into molecular mechanisms underpinning the transition to a pathogenic phenotype.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1011578}, pmid = {36466658}, issn = {1664-302X}, abstract = {Corynebacterium amycolatum is a nonlipophilic coryneform which is increasingly being recognized as a relevant human and animal pathogen showing multidrug resistance to commonly used antibiotics. However, little is known about the molecular mechanisms involved in transition from colonization to the MDR invasive phenotype in clinical isolates. In this study, we performed a comprehensive pan-genomic analysis of C. amycolatum, including 26 isolates from different countries. We obtained the novel genome sequences of 8 of them, which are multidrug resistant clinical isolates from Spain and Tunisia. They were analyzed together with other 18 complete or draft C. amycolatum genomes retrieved from GenBank. The species C. amycolatum presented an open pan-genome (α = 0.854905), with 3,280 gene families, being 1,690 (51.52%) in the core genome, 1,121 related to accessory genes (34.17%), and 469 related to unique genes (14.29%). Although some classic corynebacterial virulence factors are absent in the species C. amycolatum, we did identify genes associated with immune evasion, toxin, and antiphagocytosis among the predicted putative virulence factors. Additionally, we found genomic evidence for extensive acquisition of antimicrobial resistance genes through genomic islands.}, } @article {pmid36466249, year = {2022}, author = {Park, J and Jung, H and Mannaa, M and Lee, SY and Lee, HH and Kim, N and Han, G and Park, DS and Lee, SW and Lee, SW and Seo, YS}, title = {Genome-guided comparative in planta transcriptome analyses for identifying cross-species common virulence factors in bacterial phytopathogens.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1030720}, pmid = {36466249}, issn = {1664-462X}, abstract = {Plant bacterial disease is a complex outcome achieved through a combination of virulence factors that are activated during infection. However, the common virulence factors across diverse plant pathogens are largely uncharacterized. Here, we established a pan-genome shared across the following plant pathogens: Burkholderia glumae, Ralstonia solanacearum, and Xanthomonas oryzae pv. oryzae. By overlaying in planta transcriptomes onto the pan-genome, we investigated the expression profiles of common genes during infection. We found over 70% of identical patterns for genes commonly expressed by the pathogens in different plant hosts or infection sites. Co-expression patterns revealed the activation of a signal transduction cascade to recognize and respond to external changes within hosts. Using mutagenesis, we uncovered a relationship between bacterial virulence and functions highly conserved and shared in the studied genomes of the bacterial phytopathogens, including flagellar biosynthesis protein, C4-dicarboxylate ABC transporter, 2-methylisocitrate lyase, and protocatechuate 3,4-dioxygenase (PCD). In particular, the disruption of PCD gene led to attenuated virulence in all pathogens and significantly affected phytotoxin production in B. glumae. This PCD gene was ubiquitously distributed in most plant pathogens with high homology. In conclusion, our results provide cross-species in planta models for identifying common virulence factors, which can be useful for the protection of crops against diverse pathogens.}, } @article {pmid36466237, year = {2022}, author = {Tirnaz, S and Zandberg, J and Thomas, WJW and Marsh, J and Edwards, D and Batley, J}, title = {Application of crop wild relatives in modern breeding: An overview of resources, experimental and computational methodologies.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1008904}, pmid = {36466237}, issn = {1664-462X}, abstract = {Global agricultural industries are under pressure to meet the future food demand; however, the existing crop genetic diversity might not be sufficient to meet this expectation. Advances in genome sequencing technologies and availability of reference genomes for over 300 plant species reveals the hidden genetic diversity in crop wild relatives (CWRs), which could have significant impacts in crop improvement. There are many ex-situ and in-situ resources around the world holding rare and valuable wild species, of which many carry agronomically important traits and it is crucial for users to be aware of their availability. Here we aim to explore the available ex-/in- situ resources such as genebanks, botanical gardens, national parks, conservation hotspots and inventories holding CWR accessions. In addition we highlight the advances in availability and use of CWR genomic resources, such as their contribution in pangenome construction and introducing novel genes into crops. We also discuss the potential and challenges of modern breeding experimental approaches (e.g. de novo domestication, genome editing and speed breeding) used in CWRs and the use of computational (e.g. machine learning) approaches that could speed up utilization of CWR species in breeding programs towards crop adaptability and yield improvement.}, } @article {pmid36466225, year = {2022}, author = {Ma, J and Wei, H and Yu, X and Lv, Y and Zhang, Y and Qian, Q and Shang, L and Guo, L}, title = {Compared analysis with a high-quality genome of weedy rice reveals the evolutionary game of de-domestication.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {1065449}, pmid = {36466225}, issn = {1664-462X}, abstract = {The weedy rice (Oryza sativa f. spontanea) harbors large numbers of excellent traits and genetic diversities, which serves as a valuable germplasm resource and has been considered as a typical material for research about de-domestication. However, there are relatively few reference genomes on weedy rice that severely limit exploiting these genetic resources and revealing more details about de-domestication events. In this study, a high-quality genome (~376.4 Mb) of weedy rice A02 was assembled based on Nanopore ultra-long platform with a coverage depth of about 79.3× and 35,423 genes were predicted. Compared to Nipponbare genome, 5,574 structural variations (SVs) were found in A02. Based on super pan-genome graph, population SVs of 238 weedy rice and cultivated rice accessions were identified using public resequencing data. Furthermore, the de-domestication sites of weedy rice and domestication sites of wild rice were analyzed and compared based on SVs and single-nucleotide polymorphisms (SNPs). Interestingly, an average of 2,198 genes about de-domestication could only be found by F ST analysis based on SVs (SV-F ST) while not by F ST analysis based on SNPs (SNP-F ST) in divergent region. Additionally, there was a low overlap between domestication and de-domestication intervals, which demonstrated that two different mechanisms existed in these events. Our finding could facilitate pinpointing of the evolutionary events that had shaped the genomic architecture of wild, cultivated, and weedy rice, and provide a good foundation for cloning of the superior alleles for breeding.}, } @article {pmid36461252, year = {2022}, author = {Xiang, X and Diao, E and Shang, Y and Song, M and He, Y}, title = {Rapid quantitative detection of Vibrio parahaemolyticus via high-fidelity target-based microfluidic identification.}, journal = {Food research international (Ottawa, Ont.)}, volume = {162}, number = {Pt A}, pages = {112032}, doi = {10.1016/j.foodres.2022.112032}, pmid = {36461252}, issn = {1873-7145}, mesh = {*Vibrio parahaemolyticus/genetics ; Microfluidics ; DNA Primers ; Excipients ; Food ; }, abstract = {With the rapid development of logistics, a growing number of pathogenic microorganisms has the means to spread worldwide using food as a carrier; thus, there is an urgent need to develop effective detection strategies to ensure food safety. By combining novel markers identified by pan-genome analysis and a digital recombinase-aided amplification (RAA) detection method based on a microfluidic chip, a strategy of high-fidelity target-based microfluidic identification (HFTMI) has been developed. Herein, a proof-of-concept study of HFTMI for rapid pathogen detection of V. parahaemolyticus was investigated. Specific primers designed for the gene group_41170 identified in the pan-genome analysis showed high sensitivity and a broad spectrum for the detection of V. parahaemolyticus. Different power systems were investigated to increase the partition rate on specifically designed chamber-based digital chips. The performance of HFTMI was greatly improved compared with qPCR. Collectively, this novel HFTMI system provides more reliable guidance for food safety testing.}, } @article {pmid36461065, year = {2022}, author = {Marone, MP and Singh, HC and Pozniak, CJ and Mascher, M}, title = {A technical guide to TRITEX, a computational pipeline for chromosome-scale sequence assembly of plant genomes.}, journal = {Plant methods}, volume = {18}, number = {1}, pages = {128}, pmid = {36461065}, issn = {1746-4811}, support = {SHAPE II, FKZ 031B0884//Bundesministerium für Bildung und Forschung/ ; ERC Starting Grant TRANSFER 949873//European Commission/ ; }, abstract = {BACKGROUND: As complete and accurate genome sequences are becoming easier to obtain, more researchers wish to get one or more of them to support their research endeavors. Reliable and well-documented sequence assembly workflows find use in reference or pangenome projects.

RESULTS: We describe modifications to the TRITEX genome assembly workflow motivated by the rise of fast and easy long-read contig assembly of inbred plant genomes and the routine deployment of the toolchains in pangenome projects. New features include the use as surrogates of or complements to dense genetic maps and the introduction of user-editable tables to make the curation of contig placements easier and more intuitive.

CONCLUSION: Even maximally contiguous sequence assemblies of the telomere-to-telomere sort, and to a yet greater extent, the fragmented kind require validation, correction, and comparison to reference standards. As pangenomics is burgeoning, these tasks are bound to become more widespread and TRITEX is one tool to get them done. This technical guide is supported by a step-by-step computational tutorial accessible under https://tritexassembly.bitbucket.io/ . The TRITEX source code is hosted under this URL: https://bitbucket.org/tritexassembly .}, } @article {pmid36454681, year = {2023}, author = {Prondzinsky, P and Toyoda, S and McGlynn, SE}, title = {The methanogen core and pangenome: conservation and variability across biology's growth temperature extremes.}, journal = {DNA research : an international journal for rapid publication of reports on genes and genomes}, volume = {30}, number = {1}, pages = {}, pmid = {36454681}, issn = {1756-1663}, support = {JPMJSP2106//JST SPRING/ ; }, mesh = {Temperature ; Phylogeny ; *Archaea/genetics/metabolism ; *Hot Temperature ; Genomics ; }, abstract = {Temperature is a key variable in biological processes. However, a complete understanding of biological temperature adaptation is lacking, in part because of the unique constraints among different evolutionary lineages and physiological groups. Here we compared the genomes of cultivated psychrotolerant and thermotolerant methanogens, which are physiologically related and span growth temperatures from -2.5°C to 122°C. Despite being phylogenetically distributed amongst three phyla in the archaea, the genomic core of cultivated methanogens comprises about one-third of a given genome, while the genome fraction shared by any two organisms decreases with increasing phylogenetic distance between them. Increased methanogenic growth temperature is associated with reduced genome size, and thermotolerant organisms-which are distributed across the archaeal tree-have larger core genome fractions, suggesting that genome size is governed by temperature rather than phylogeny. Thermotolerant methanogens are enriched in metal and other transporters, and psychrotolerant methanogens are enriched in proteins related to structure and motility. Observed amino acid compositional differences between temperature groups include proteome charge, polarity and unfolding entropy. Our results suggest that in the methanogens, shared physiology maintains a large, conserved genomic core even across large phylogenetic distances and biology's temperature extremes.}, } @article {pmid36454044, year = {2023}, author = {Pham, HM and Le, DT and Le, LT and Chu, PTM and Tran, LH and Pham, TT and Nguyen, HM and Luu, TT and Hoang, H and Chu, HH}, title = {A highly quality genome sequence of Penicillium oxalicum species isolated from the root of Ixora chinensis in Vietnam.}, journal = {G3 (Bethesda, Md.)}, volume = {13}, number = {2}, pages = {}, pmid = {36454044}, issn = {2160-1836}, mesh = {Phylogeny ; Vietnam ; *Genome ; *Penicillium/genetics/metabolism ; }, abstract = {Penicillium oxalicum has been reported as a multienzyme-producing fungus and is widely used in industry due to great potential for cellulase release. Until now, there are only 10 available genome assemblies of P. oxalicum species deposited in the GenBank database. In this study, the genome of the I1R1 strain isolated from the root of Ixora chinensis was completely sequenced by Pacbio Sequel sequencing technology, assembled into 8 chromosomes with the genome size of 30.8 Mb, as well as a mitogenome of 26 kb. The structural and functional analyses of the I1R1 genome revealed gene model annotations encoding an enzyme set involved in significant metabolic processes, along with cytochrome P450s and secondary metabolite biosynthesis. The comparative analysis of the P. oxalicum species based on orthology and gene family duplications indicated their large and closed pan-genome of 9,500 orthologous groups. This is valuable data for future phylogenetic and population genomics studies.}, } @article {pmid36453992, year = {2022}, author = {Rabanal, FA and Gräff, M and Lanz, C and Fritschi, K and Llaca, V and Lang, M and Carbonell-Bejerano, P and Henderson, I and Weigel, D}, title = {Pushing the limits of HiFi assemblies reveals centromere diversity between two Arabidopsis thaliana genomes.}, journal = {Nucleic acids research}, volume = {50}, number = {21}, pages = {12309-12327}, pmid = {36453992}, issn = {1362-4962}, mesh = {Sequence Analysis, DNA ; *Arabidopsis/genetics ; High-Throughput Nucleotide Sequencing ; Centromere/genetics ; DNA, Ribosomal ; }, abstract = {Although long-read sequencing can often enable chromosome-level reconstruction of genomes, it is still unclear how one can routinely obtain gapless assemblies. In the model plant Arabidopsis thaliana, other than the reference accession Col-0, all other accessions de novo assembled with long-reads until now have used PacBio continuous long reads (CLR). Although these assemblies sometimes achieved chromosome-arm level contigs, they inevitably broke near the centromeres, excluding megabases of DNA from analysis in pan-genome projects. Since PacBio high-fidelity (HiFi) reads circumvent the high error rate of CLR technologies, albeit at the expense of read length, we compared a CLR assembly of accession Eyach15-2 to HiFi assemblies of the same sample. The use of five different assemblers starting from subsampled data allowed us to evaluate the impact of coverage and read length. We found that centromeres and rDNA clusters are responsible for 71% of contig breaks in the CLR scaffolds, while relatively short stretches of GA/TC repeats are at the core of >85% of the unfilled gaps in our best HiFi assemblies. Since the HiFi technology consistently enabled us to reconstruct gapless centromeres and 5S rDNA clusters, we demonstrate the value of the approach by comparing these previously inaccessible regions of the genome between the Eyach15-2 accession and the reference accession Col-0.}, } @article {pmid36453910, year = {2022}, author = {Belloso Daza, MV and Almeida-Santos, AC and Novais, C and Read, A and Alves, V and Cocconcelli, PS and Freitas, AR and Peixe, L}, title = {Distinction between Enterococcus faecium and Enterococcus lactis by a gluP PCR-Based Assay for Accurate Identification and Diagnostics.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0326822}, pmid = {36453910}, issn = {2165-0497}, mesh = {Humans ; Anti-Bacterial Agents ; *Enterococcus faecium/genetics/isolation & purification ; Genome, Bacterial ; *Gram-Positive Bacterial Infections/diagnosis/microbiology ; *Polymerase Chain Reaction ; *Enterococcus/genetics/isolation & purification ; }, abstract = {It was recently proposed that Enterococcus faecium colonizing the human gut (previous clade B) actually corresponds to Enterococcus lactis. Our goals were to develop a PCR assay to rapidly differentiate these species and to discuss the main phenotypic and genotypic differences from a clinical perspective. The pan-genome of 512 genomes of E. faecium and E. lactis strains was analyzed to assess diversity in genes between the two species. Sequences were aligned to find the best candidate gene for designing species-specific primers, and their accuracy was tested with a collection of 382 enterococci. E. lactis isolates from clinical origins were further characterized by whole-genome sequencing (Illumina). Pan-genome analysis resulted in 12 gene variants, with gene gluP (rhomboid protease) being selected as the candidate for species differentiation. The nucleotide sequence of gluP diverged by 90 to 92% between sets, which allowed species identification through PCR with 100% specificity and no cross-reactivity. E. lactis strains were greatly pan-susceptible and not host specific. Hospital E. lactis isolates were susceptible to clinically relevant antibiotics, lacked infection-associated virulence markers, and were associated with patients presenting risk factors for enhanced bacterial translocation. Here, we propose a PCR-based assay using gluP for easy routine differentiation between E. faecium and E. lactis that could be implemented in different public health contexts. We further suggest that E. lactis, a dominant human gut species, can cross the gut barrier in severely ill, immunodeficient, and surgical patients. Knowing that bacterial translocation may be a sepsis promoter, the relevance of infections caused by E. lactis strains, even if they are pan-susceptible, should be explored. IMPORTANCE Enterococcus faecium is a WHO priority pathogen that causes severe and hard-to-treat human infections. It was recently proposed that E. faecium colonizing the human gut (previous clade B) actually corresponds to Enterococcus lactis; therefore, some of the human infections occurring globally are being misidentified. In this work, we developed a PCR-based rapid identification method for the differentiation of E. faecium and E. lactis and discussed the main phenotypic and genotypic differences of these species from a clinical perspective. We identified the gluP gene as the best candidate, based on the phylogenomic analysis of 512 published pan-genomes, and validated the PCR assay with a comprehensive collection of 382 enterococci obtained from different sources. Further detailed analysis of clinical E. lactis strains showed that they are highly susceptible to antibiotics and lack the typical virulence markers of E. faecium but are able to cause severe human infections in immunosuppressed patients, possibly in part due to gut barrier translocation.}, } @article {pmid36451103, year = {2022}, author = {Sarkar, S and Kamke, A and Ward, K and Hartung, E and Ran, Q and Feehan, B and Galliart, M and Jumpponen, A and Johnson, L and Lee, STM}, title = {Pseudomonas cultivated from Andropogon gerardii rhizosphere show functional potential for promoting plant host growth and drought resilience.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {784}, pmid = {36451103}, issn = {1471-2164}, support = {OIA-1656006//National Science Foundation/ ; 2020-67019-31803//National Institute of Food and Agriculture/ ; }, mesh = {*Andropogon ; Rhizosphere ; Droughts ; Pseudomonas ; Phylogeny ; *Poa ; Nitrogen ; Nitrate Reductases ; }, abstract = {BACKGROUND: Climate change will result in more frequent droughts that can impact soil-inhabiting microbiomes (rhizobiomes) in the agriculturally vital North American perennial grasslands. Rhizobiomes have contributed to enhancing drought resilience and stress resistance properties in plant hosts. In the predicted events of more future droughts, how the changing rhizobiome under environmental stress can impact the plant host resilience needs to be deciphered. There is also an urgent need to identify and recover candidate microorganisms along with their functions, involved in enhancing plant resilience, enabling the successful development of synthetic communities.

RESULTS: In this study, we used the combination of cultivation and high-resolution genomic sequencing of bacterial communities recovered from the rhizosphere of a tallgrass prairie foundation grass, Andropogon gerardii. We cultivated the plant host-associated microbes under artificial drought-induced conditions and identified the microbe(s) that might play a significant role in the rhizobiome of Andropogon gerardii under drought conditions. Phylogenetic analysis of the non-redundant metagenome-assembled genomes (MAGs) identified a bacterial genome of interest - MAG-Pseudomonas. Further metabolic pathway and pangenome analyses recovered genes and pathways related to stress responses including ACC deaminase; nitrogen transformation including assimilatory nitrate reductase in MAG-Pseudomonas, which might be associated with enhanced drought tolerance and growth for Andropogon gerardii.

CONCLUSIONS: Our data indicated that the metagenome-assembled MAG-Pseudomonas has the functional potential to contribute to the plant host's growth during stressful conditions. Our study also suggested the nitrogen transformation potential of MAG-Pseudomonas that could impact Andropogon gerardii growth in a positive way. The cultivation of MAG-Pseudomonas sets the foundation to construct a successful synthetic community for Andropogon gerardii. To conclude, stress resilience mediated through genes ACC deaminase, nitrogen transformation potential through assimilatory nitrate reductase in MAG-Pseudomonas could place this microorganism as an important candidate of the rhizobiome aiding the plant host resilience under environmental stress. This study, therefore, provided insights into the MAG-Pseudomonas and its potential to optimize plant productivity under ever-changing climatic patterns, especially in frequent drought conditions.}, } @article {pmid36449159, year = {2023}, author = {Groza, C and Bourque, G and Goubert, C}, title = {A Pangenome Approach to Detect and Genotype TE Insertion Polymorphisms.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2607}, number = {}, pages = {85-94}, pmid = {36449159}, issn = {1940-6029}, mesh = {Humans ; *DNA Transposable Elements/genetics ; Genotype ; *Polymorphism, Genetic ; Haplotypes ; Genome, Human ; }, abstract = {Pangenome graphs are flexible data structures that contain the genetic variation that exists in a population of genomes and describe the sequences of the many possible ensuing haplotypes. Here, we use such a pangenome graph to represent and genotype transposable element (TE) polymorphisms. By combining the transposable element annotation (Alus, L1s, and SVAs) of the human genome reference with novel transposable element insertions observed in two high-quality assemblies (HG002 and HG00733), we show how to create a transposable element pangenome that consists of ~1.2 million reference and 2939 non-reference transposable elements. We then demonstrate this approach by aligning short-read sequencing data and genotyping transposable element deletions and insertions with reasonable specificity and sensitivity (0.85 F1-score).}, } @article {pmid36448683, year = {2023}, author = {Garrison, E and Guarracino, A}, title = {Unbiased pangenome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {39}, number = {1}, pages = {}, pmid = {36448683}, issn = {1367-4811}, support = {U01 DA047638/DA/NIDA NIH HHS/United States ; //NIH/ ; //Human Technopole in Milan/ ; #2118709//NSF PPoSS/ ; }, mesh = {Sequence Analysis, DNA ; *Algorithms ; *Software ; Genome ; Documentation ; }, abstract = {MOTIVATION: Pangenome variation graphs model the mutual alignment of collections of DNA sequences. A set of pairwise alignments implies a variation graph, but there are no scalable methods to generate such a graph from these alignments. Existing related approaches depend on a single reference, a specific ordering of genomes or a de Bruijn model based on a fixed k-mer length. A scalable, self-contained method to build pangenome graphs without such limitations would be a key step in pangenome construction and manipulation pipelines.

RESULTS: We design the seqwish algorithm, which builds a variation graph from a set of sequences and alignments between them. We first transform the alignment set into an implicit interval tree. To build up the variation graph, we query this tree-based representation of the alignments to reduce transitive matches into single DNA segments in a sequence graph. By recording the mapping from input sequence to output graph, we can trace the original paths through this graph, yielding a pangenome variation graph. We present an implementation that operates in external memory, using disk-backed data structures and lock-free parallel methods to drive the core graph induction step. We demonstrate that our method scales to very large graph induction problems by applying it to build pangenome graphs for several species.

seqwish is published as free software under the MIT open source license. Source code and documentation are available at https://github.com/ekg/seqwish. seqwish can be installed via Bioconda https://bioconda.github.io/recipes/seqwish/README.html or GNU Guix https://github.com/ekg/guix-genomics/blob/master/seqwish.scm.}, } @article {pmid36447475, year = {2022}, author = {Moniruzzaman, M and Erazo-Garcia, MP and Aylward, FO}, title = {Endogenous giant viruses contribute to intraspecies genomic variability in the model green alga Chlamydomonas reinhardtii.}, journal = {Virus evolution}, volume = {8}, number = {2}, pages = {veac102}, pmid = {36447475}, issn = {2057-1577}, support = {R35 GM147290/GM/NIGMS NIH HHS/United States ; }, abstract = {Chlamydomonas reinhardtii is a unicellular eukaryotic alga that has been studied as a model organism for decades. Despite an extensive history as a model system, phylogenetic and genetic characteristics of viruses infecting this alga have remained elusive. We analyzed high-throughput genome sequence data of C. reinhardtii field isolates, and in six we discovered sequences belonging to endogenous giant viruses that reach up to several 100 kb in length. In addition, we have also discovered the entire genome of a closely related giant virus that is endogenized within the genome of Chlamydomonas incerta, the closest sequenced relative of C. reinhardtii. Endogenous giant viruses add hundreds of new gene families to the host strains, highlighting their contribution to the pangenome dynamics and interstrain genomic variability of C. reinhardtii. Our findings suggest that the endogenization of giant viruses may have important implications for structuring the population dynamics and ecology of protists in the environment.}, } @article {pmid36445094, year = {2022}, author = {Yu, Y and Cheng, W and Chen, X and Guo, Q and Cao, H}, title = {Cyanobacterial Blooms Are Not a Result of Positive Selection by Freshwater Eutrophication.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0319422}, pmid = {36445094}, issn = {2165-0497}, mesh = {Phylogeny ; *Lakes/microbiology ; *Cyanobacteria/genetics ; Harmful Algal Bloom ; Water ; }, abstract = {Long-standing cyanobacterial harmful algal blooms (CyanoHABs) are known to result from synergistic interaction between elevated nutrients and superior ecophysiology of cyanobacteria. However, it remains to be determined whether CyanoHABs are a result of positive selection by eutrophic waters. To address this, we conducted molecular evolutionary analyses on the genomes of 9 bloom-forming cyanobacteria, combined with pangenomics and metatranscriptomics. The results showed no positive selection by water eutrophication. Instead, all homologous genes in the species are under strong purifying selection based on the ratio of divergence at nonsynonymous and synonymous sites (dN/dS) and phylogeny. The dN/dS < 0.85 (median = 0.3) for all homologous genes are similar between the genes in the pathways driving CyanoHABs and housekeeping functions. Phylogenetic support for non-positive selection comes from the mixed clustering of strains: strains of the same species from diverse geographic origins form the same clusters, while strains from the same origins form different clusters. Further support lies in the codon adaptation index (CAI) and single nucleotide polymorphism (SNP). The CAI ranged from 0.42 to 0.9 (mean = 0.75), which indicates high-level codon usage bias; the pathways for CyanoHABs and housekeeping functions showed a similar CAI. Interestingly, CAI was negatively correlated with gene expression in 3 metatranscriptomes. The numbers of SNPs were concentrated around 5 to 50. As the SNP number increases, the gene expression level decreases. These negative correlations agree with the population-level dN/dS and phylogeny in supporting purifying selection in bloom-forming cyanobacteria. In summary, superior ecophysiology appears to be acquired prior to water eutrophication. IMPORTANCE CyanoHABs are global environmental hazards, and their mechanisms of action are being intensively investigated. On an ecological scale, CyanoHABs are consequences of synergistic interactions between biological functions and elevated nutrients in eutrophic waters. On an evolutionary scale, one important question is how bloom-forming cyanobacteria acquire these superior biological functions. There are several possibilities, including adaptive evolution and horizontal gene transfer. Here, we explored the possibility of positive selection. We reasoned that there are two possible periods for cyanobacteria to acquire these functions: before the onset of water eutrophication or during water eutrophication. Either way, there should be molecular signatures in protein sequences for positive selection. Interestingly, we found no positive selection by water eutrophication, but strong purifying selection instead on nearly all the genes, suggesting these superior functions aiding CyanoHABs are acquired prior to water eutrophication.}, } @article {pmid36445082, year = {2022}, author = {Cheng, S and Fleres, G and Chen, L and Liu, G and Hao, B and Newbrough, A and Driscoll, E and Shields, RK and Squires, KM and Chu, TY and Kreiswirth, BN and Nguyen, MH and Clancy, CJ}, title = {Within-Host Genotypic and Phenotypic Diversity of Contemporaneous Carbapenem-Resistant Klebsiella pneumoniae from Blood Cultures of Patients with Bacteremia.}, journal = {mBio}, volume = {13}, number = {6}, pages = {e0290622}, pmid = {36445082}, issn = {2150-7511}, mesh = {Animals ; Mice ; Klebsiella pneumoniae/genetics ; Blood Culture ; Anti-Bacterial Agents/therapeutic use ; Carbapenems ; *Carbapenem-Resistant Enterobacteriaceae/genetics ; *Bacteremia/microbiology ; *Sepsis/drug therapy ; *Klebsiella Infections/microbiology ; Microbial Sensitivity Tests ; beta-Lactamases ; }, abstract = {It is unknown whether bacterial bloodstream infections (BSIs) are commonly caused by single organisms or mixed microbial populations. We hypothesized that contemporaneous carbapenem-resistant Klebsiella pneumoniae (CRKP) strains from blood cultures of individual patients are genetically and phenotypically distinct. We determined short-read whole-genome sequences of 10 sequence type 258 (ST258) CRKP strains from blood cultures in each of 6 patients (Illumina HiSeq). Strains clustered by patient by core genome and pan-genome phylogeny. In 5 patients, there was within-host strain diversity by gene mutations, presence/absence of antibiotic resistance or virulence genes, and/or plasmid content. Accessory gene phylogeny revealed strain diversity in all 6 patients. Strains from 3 patients underwent long-read sequencing for genome completion (Oxford Nanopore) and phenotypic testing. Genetically distinct strains within individuals exhibited significant differences in carbapenem and other antibiotic responses, capsular polysaccharide (CPS) production, mucoviscosity, and/or serum killing. In 2 patients, strains differed significantly in virulence during mouse BSIs. Genetic or phenotypic diversity was not observed among strains recovered from blood culture bottles seeded with index strains from the 3 patients and incubated in vitro at 37°C. In conclusion, we identified genotypic and phenotypic variant ST258 CRKP strains from blood cultures of individual patients with BSIs, which were not detected by the clinical laboratory or in seeded blood cultures. The data suggest a new paradigm of CRKP population diversity during BSIs, at least in some patients. If validated for BSIs caused by other bacteria, within-host microbial diversity may have implications for medical, microbiology, and infection prevention practices and for understanding antibiotic resistance and pathogenesis. IMPORTANCE The long-standing paradigm for pathogenesis of bacteremia is that, in most cases, a single organism passes through a bottleneck and establishes itself in the bloodstream (single-organism hypothesis). In keeping with this paradigm, standard practice in processing positive microbiologic cultures is to test single bacterial strains from morphologically distinct colonies. This study is the first genome-wide analysis of within-host diversity of Klebsiella pneumoniae strains recovered from individual patients with bloodstream infections (BSIs). Our finding that positive blood cultures comprised genetically and phenotypically heterogeneous carbapenem-resistant K. pneumoniae strains challenges the single-organism hypothesis and suggests that at least some BSIs are caused by mixed bacterial populations that are unrecognized by the clinical laboratory. The data support a model of pathogenesis in which pressures in vivo select for strain variants with particular antibiotic resistance or virulence attributes and raise questions about laboratory protocols and treatment decisions directed against single strains.}, } @article {pmid36445077, year = {2022}, author = {Conde, C and Thézé, J and Cochard, T and Rossignol, MN and Fourichon, C and Delafosse, A and Joly, A and Guatteo, R and Schibler, L and Bannantine, JP and Biet, F}, title = {Genetic Features of Mycobacterium avium subsp. paratuberculosis Strains Circulating in the West of France Deciphered by Whole-Genome Sequencing.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0339222}, pmid = {36445077}, issn = {2165-0497}, mesh = {Animals ; Cattle ; *Mycobacterium avium subsp. paratuberculosis/genetics ; *Paratuberculosis/epidemiology/microbiology ; Phylogeny ; Longitudinal Studies ; Ruminants ; }, abstract = {Paratuberculosis is a chronic infection of the intestine, mainly the ileum, caused by Mycobacterium avium subsp. paratuberculosis in cattle and other ruminants. This enzootic disease is present worldwide and has a negative impact on the dairy cattle industry. For this subspecies, the current genotyping tools do not provide the needed resolution to investigate the genetic diversity of closely related strains. These limitations can be overcome by the application of whole-genome sequencing (WGS), particularly for clonal populations such as M. avium subsp. paratuberculosis. The purpose of the present study was to undertake a WGS analysis with a panel of 200 animal field M. avium subsp. paratuberculosis strains selected based on a previous large-scale longitudinal study of Prim'Holstein and Normande dairy breeds naturally infected with M. avium subsp. paratuberculosis in the West of France. The pangenome analysis revealed that M. avium subsp. paratuberculosis has a closed pangenome. The phylogeny, based on alignment of 2,786 nonhomoplasic single nucleotide polymorphisms (SNPs), showed that the strain population is structured into three clades independently of the cattle breed or geographic distribution. The increased resolution of phylogeny obtained by WGS confirmed the homoplasic nature of the markers variable-number tandem repeat (VNTR) and short sequence repeat (SSR) used for M. avium subsp. paratuberculosis genotyping. These phylogenetic data also revealed independent introductions of the different genotypes in two main waves since at least 2003. WGS applied to this sampling demonstrated the presence of mixed infections in herds and at the individual animal level. Collectively, the phylogeny results inferred with French isolates compared to M. avium subsp. paratuberculosis isolates from around the world suggest introductions of M. avium subsp. paratuberculosis genotypes through the animal trade. Relationships between genetic traits and epidemiological data can now be investigated to better understand transmission dynamics of the disease. IMPORTANCE Mycobacterium avium subsp. paratuberculosis causes Johne's disease in ruminants, which is present worldwide and has significant negative impacts on the dairy cattle industry and animal welfare. Prevention and control of M. avium subsp. paratuberculosis infection are hampered by knowledge gaps in strain virulence, genotype distribution, and transmission dynamics. This work has revealed new insights into M. avium subsp. paratuberculosis strains currently circulating in western France and how they are related to strains circulating globally. We applied whole-genome sequencing (WGS) to obtain comprehensive information on genome evolution and discrimination of closely related strains. This approach revealed the history of M. avium subsp. paratuberculosis infection in France, refined the pangenomic characteristics of M. avium subsp. paratuberculosis, and demonstrated the existence of mixed infection in animals. Finally, this study identified predominant genotypes, which allow a better understanding of disease transmission dynamics. This information will facilitate tracking of this pathogen on farms and across agricultural regions, thus informing transmission pathways and disease control points.}, } @article {pmid36437921, year = {2022}, author = {Singh, V and Pandey, S and Bhardwaj, A}, title = {From the reference human genome to human pangenome: Premise, promise and challenge.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {1042550}, pmid = {36437921}, issn = {1664-8021}, abstract = {The Reference Human Genome remains the single most important resource for mapping genetic variations and assessing their impact. However, it is monophasic, incomplete and not representative of the variation that exists in the population. Given the extent of ethno-geographic diversity and the consequent diversity in clinical manifestations of these variations, population specific references were developed overtime. The dramatically plummeting cost of sequencing whole genomes and the advent of third generation long range sequencers allowing accurate, error free, telomere-to-telomere assemblies of human genomes present us with a unique and unprecedented opportunity to develop a more composite standard reference consisting of a collection of multiple genomes that capture the maximal variation existing in the population, with the deepest annotation possible, enabling a realistic, reliable and actionable estimation of clinical significance of specific variations. The Human Pangenome Project thus is a logical next step promising a more accurate and global representation of genomic variations. The pangenome effort must be reciprocally complemented with precise variant discovery tools and exhaustive annotation to ensure unambiguous clinical assessment of the variant in ethno-geographical context. Here we discuss a broad roadmap, the challenges and way forward in developing a universal pangenome reference including data visualization techniques and integration of prior knowledge base in the new graph based architecture and tools to submit, compare, query, annotate and retrieve relevant information from the pangenomes. The biggest challenge, however, will be the ethical, legal and social implications and the training of human resource to the new reference paradigm.}, } @article {pmid36436132, year = {2022}, author = {Zoaiter, M and Magdy Wasfy, R and Caputo, A and Fenollar, F and Zeaiter, Z and Fournier, PE and Houhamdi, L}, title = {Streptococcus bouchesdurhonensis sp. nov. isolated from a bronchoalveolar lavage of a patient with pneumonia.}, journal = {Archives of microbiology}, volume = {205}, number = {1}, pages = {3}, pmid = {36436132}, issn = {1432-072X}, mesh = {Humans ; Aged ; RNA, Ribosomal, 16S/genetics ; Phylogeny ; *Genome, Bacterial ; DNA, Bacterial/genetics ; Streptococcus/genetics ; Bronchoalveolar Lavage ; *Pneumonia/genetics ; }, abstract = {Strain Marseille-Q6994 was isolated from a 72-year-old patient with pneumonia from Bouches-du-Rhône department, in France. Cells were Gram positive, non-motile, catalase and oxidase-negative cocci. The major fatty acids were hexadecanoic (47.4%) and tetradecanoic acids (28.3%). 16S rRNA gene sequence comparison suggested that strain Marseille-Q6994 was affiliated to the Streptococcus genus. GroEL phylogenetic analysis separated strain Marseille-Q6994 in a distinct branch from the closely related Streptococcus-type strains with standing in nomenclature. Whole genome sequencing-based methods (OrthoAverage Nucleotide Identity, digital DNA-DNA hybridization and pangenome analysis) supported the classification of the strain into a novel species. Therefore, based on the phenotypic, genomic, and phylogenetic analyses, we propose the name Streptococcus bouchesdurhonensis sp. nov for which strain Marseille-Q6994[T] (CSUR Marseille-Q6994 = DSMZ 113892) is the type strain.}, } @article {pmid36432770, year = {2022}, author = {Jha, UC and Nayyar, H and von Wettberg, EJB and Naik, YD and Thudi, M and Siddique, KHM}, title = {Legume Pangenome: Status and Scope for Crop Improvement.}, journal = {Plants (Basel, Switzerland)}, volume = {11}, number = {22}, pages = {}, pmid = {36432770}, issn = {2223-7747}, abstract = {In the last decade, legume genomics research has seen a paradigm shift due to advances in genome sequencing technologies, assembly algorithms, and computational genomics that enabled the construction of high-quality reference genome assemblies of major legume crops. These advances have certainly facilitated the identification of novel genetic variants underlying the traits of agronomic importance in many legume crops. Furthermore, these robust sequencing technologies have allowed us to study structural variations across the whole genome in multiple individuals and at the species level using 'pangenome analysis.' This review updates the progress of constructing pangenome assemblies for various legume crops and discusses the prospects for these pangenomes and how to harness the information to improve various traits of economic importance through molecular breeding to increase genetic gain in legumes and tackle the increasing global food crisis.}, } @article {pmid36429532, year = {2022}, author = {Almuhayawi, MS and Al Jaouni, SK and Selim, S and Alkhalifah, DHM and Marc, RA and Aslam, S and Poczai, P}, title = {Integrated Pangenome Analysis and Pharmacophore Modeling Revealed Potential Novel Inhibitors against Enterobacter xiangfangensis.}, journal = {International journal of environmental research and public health}, volume = {19}, number = {22}, pages = {}, pmid = {36429532}, issn = {1660-4601}, mesh = {*Bacterial Proteins/genetics/metabolism ; *Enterobacter/genetics/metabolism ; Genome, Bacterial ; Uridine Diphosphate ; }, abstract = {Enterobacter xiangfangensis is a novel, multidrug-resistant pathogen belonging to the Enterobacter genus and has the ability to acquire resistance to multiple antibiotic classes. However, there is currently no registered E. xiangfangensis drug on the market that has been shown to be effective. Hence, there is an urgent need to identify novel therapeutic targets and effective treatments for E. xiangfangensis. In the current study, a bacterial pan genome analysis and subtractive proteomics approach was employed to the core proteomes of six strains of E. xiangfangensis using several bioinformatic tools, software, and servers. However, 2611 nonredundant proteins were predicted from the 21,720 core proteins of core proteome. Out of 2611 nonredundant proteins, 372 were obtained from Geptop2.0 as essential proteins. After the subtractive proteomics and subcellular localization analysis, only 133 proteins were found in cytoplasm. All cytoplasmic proteins were examined using BLASTp against the virulence factor database, which classifies 20 therapeutic targets as virulent. Out of these 20, 3 cytoplasmic proteins: ferric iron uptake transcriptional regulator (FUR), UDP-2,3diacylglucosamine diphosphatase (UDP), and lipid-A-disaccharide synthase (lpxB) were chosen as potential drug targets. These drug targets are important for bacterial survival, virulence, and growth and could be used as therapeutic targets. More than 2500 plant chemicals were used to molecularly dock these proteins. Furthermore, the lowest-binding energetic docked compounds were found. The top five hit compounds, Adenine, Mollugin, Xanthohumol C, Sakuranetin, and Toosendanin demonstrated optimum binding against all three target proteins. Furthermore, molecular dynamics simulations and MM/GBSA analyses validated the stability of ligand-protein complexes and revealed that these compounds could serve as potential E. xiangfangensis replication inhibitors. Consequently, this study marks a significant step forward in the creation of new and powerful drugs against E. xiangfangensis. Future studies should validate these targets experimentally to prove their function in E. xiangfangensis survival and virulence.}, } @article {pmid36427110, year = {2022}, author = {González-Castillo, A and Carballo, JL and Bautista-Guerrero, E}, title = {Genomics, Phylogeny, and in Silico Phenotyping of Nitrosopumilus Genus.}, journal = {Current microbiology}, volume = {80}, number = {1}, pages = {3}, pmid = {36427110}, issn = {1432-0991}, support = {254806//CONACYT-SEP/ ; }, mesh = {Animals ; Phylogeny ; *Genomics ; Archaea ; *Porifera ; Multilocus Sequence Typing ; }, abstract = {The present study reports the first genome of Nitrosopumilus extracted from the marine sponge Thoosa mismalolli. The genomic study of Nitrosopumilus genus using seven genomes type strains (N. maritimus, N. piranensis, N. zosterae, N. ureiphilus, N. adriaticus, N. oxyclinae and N. cobalaminigenes), four genomes Candidatus species (Ca. N. koreensis, Ca. N. sp. AR2, Ca. N. salaria BD31, and SZUA-335), and six reference genomes (SI075, SI0036, SI0060, SI0034, SI0048, and bin36o) isolated from marine sponge, a tropical marine fish tank, dimly lit deep coastal waters, the lower euphotic zone of coastal waters, near-surface sediment, and MAG N. sp NMAG03 isolated from Thoosa mismalolli was performed. These genomes were characterized by means of a polyphasic approach comprising multilocus sequence analysis (MLSA) of 139 single-copy genes (SCG), core-pangenome, ANI, and in silico phenotypic characterization. We found that the genomes of the Nitrosopumilus genus formed three separate clusters (A, B, and C) based in 139 SCG sequence similarity. The genomes showed values between 75.2 and 99.5% for ANI, the core genome consisted of 168 gene families and the pangenome of 6,011 gene families. Based on the genomic analyses performed, the cluster A may contain a potential new species (NMAG03), and the cluster C could be represented by three new species of the genus. Finally, based on the results shown in this polyphasic approach, we support the use of the integrated approach for genomic analysis of poorly studied genera.}, } @article {pmid36425027, year = {2022}, author = {Gtari, M}, title = {Taxogenomic status of phylogenetically distant Frankia clusters warrants their elevation to the rank of genus: A description of Protofrankia gen. nov., Parafrankia gen. nov., and Pseudofrankia gen. nov. as three novel genera within the family Frankiaceae.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1041425}, pmid = {36425027}, issn = {1664-302X}, abstract = {The genus Frankia is at present the sole genus in the family Frankiaceae and encompasses filamentous, sporangia-forming actinomycetes principally isolated from root nodules of taxonomically disparate dicotyledonous hosts named actinorhizal plants. Multiple independent phylogenetic analyses agree with the division of the genus Frankia into four well-supported clusters. Within these clusters, Frankia strains are well defined based on host infectivity range, mode of infection, morphology, and their behaviour in culture. In this study, phylogenomics, overall genome related indices (OGRI), together with available data sets for phenotypic and host-plant ranges available for the type strains of Frankia species, were considered. The robustness and the deep radiation observed in Frankia at the subgeneric level, fulfilling the primary principle of phylogenetic systematics, were strengthened by establishing genome criteria for new genus demarcation boundaries. Therefore, the taxonomic elevation of the Frankia clusters to the rank of the genus is proposed. The genus Frankia should be revised to encompass cluster 1 species only and three novel genera, Protofrankia gen. nov., Parafrankia gen. nov., and Pseudofrankia gen. nov., are proposed to accommodate clusters 2, 3, and 4 species, respectively. New combinations for validly named species are also provided.}, } @article {pmid36423113, year = {2022}, author = {Swetha, RG and Basu, S and Ramaiah, S and Anbarasu, A}, title = {Multi-Epitope Vaccine for Monkeypox Using Pan-Genome and Reverse Vaccinology Approaches.}, journal = {Viruses}, volume = {14}, number = {11}, pages = {}, pmid = {36423113}, issn = {1999-4915}, mesh = {Child ; Humans ; Vaccinology ; *Monkeypox ; Molecular Docking Simulation ; Epitopes, B-Lymphocyte ; *Vaccines ; }, abstract = {Outbreaks of monkeypox virus infections have imposed major health concerns worldwide, with high morbidity threats to children and immunocompromised adults. Although repurposed drugs and vaccines are being used to curb the disease, the evolving traits of the virus, exhibiting considerable genetic dynamicity, challenge the limits of a targeted treatment. A pan-genome-based reverse vaccinology approach can provide fast and efficient solutions to resolve persistent inconveniences in experimental vaccine design during an outbreak-exigency. The approach encompassed screening of available monkeypox whole genomes (n = 910) to identify viral targets. From 102 screened viral targets, viral proteins L5L, A28, and L5 were finalized based on their location, solubility, and antigenicity. The potential T-cell and B-cell epitopes were extracted from the proteins using immunoinformatics tools and algorithms. Multiple vaccine constructs were designed by combining the epitopes. Based on immunological properties, chemical stability, and structural quality, a novel multi-epitopic vaccine construct, V4, was finalized. Flexible-docking and coarse-dynamics simulation portrayed that the V4 had high binding affinity towards human HLA-proteins (binding energy < -15.0 kcal/mol) with low conformational fluctuations (<1 Å). Thus, the vaccine construct (V4) may act as an efficient vaccine to induce immunity against monkeypox, which encourages experimental validation and similar approaches against emerging viral infections.}, } @article {pmid36421834, year = {2022}, author = {Jalil, M and Quddos, F and Anwer, F and Nasir, S and Rahman, A and Alharbi, M and Alshammari, A and Alshammari, HK and Ali, A}, title = {Comparative Pan-Genomic Analysis Revealed an Improved Multi-Locus Sequence Typing Scheme for Staphylococcus aureus.}, journal = {Genes}, volume = {13}, number = {11}, pages = {}, pmid = {36421834}, issn = {2073-4425}, mesh = {Humans ; Multilocus Sequence Typing/methods ; Staphylococcus aureus/genetics ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Phylogeny ; *Staphylococcal Infections/epidemiology ; Genomics ; }, abstract = {The growing prevalence of antibiotic-resistant Staphylococcus aureus strains mandates selective susceptibility testing and epidemiological investigations. It also draws attention to an efficient typing strategy. Whole genome sequencing helps in genetic comparison, strain differentiation, and typing; however, it is not that cost-effective. In comparison, Multi-Locus Sequence Typing (MLST) is an efficient typing method employed for bacterial strain typing and characterizations. In this paper, a comprehensive pangenome and phylogenetic analysis of 502/1279 S. aureus genomes is carried out to understand the species divergence. Additionally, the current Multi-Locus Sequence Typing (MLST) scheme was evaluated, and genes were excluded or substituted by alternative genes based on reported shortcomings, genomic data, and statistical scores calculated. The data generated were helpful in devising a new Multi-Locus Sequence Typing (MLST) scheme for the efficient typing of S. aureus strains. The revised scheme is now a blend of previously used genes and new candidate genes. The genes yQil, aroE, and gmk are replaced with better gene candidates, opuCC, aspS, and rpiB, based on their genome localization, representation, and statistical scores. Therefore, the proposed Multi-Locus Sequence Typing (MLST) method offers a greater resolution with 58 sequence types (STs) in comparison to the prior scheme's 42 STs.}, } @article {pmid36420896, year = {2023}, author = {Frankish, A and Carbonell-Sala, S and Diekhans, M and Jungreis, I and Loveland, JE and Mudge, JM and Sisu, C and Wright, JC and Arnan, C and Barnes, I and Banerjee, A and Bennett, R and Berry, A and Bignell, A and Boix, C and Calvet, F and Cerdán-Vélez, D and Cunningham, F and Davidson, C and Donaldson, S and Dursun, C and Fatima, R and Giorgetti, S and Giron, CG and Gonzalez, JM and Hardy, M and Harrison, PW and Hourlier, T and Hollis, Z and Hunt, T and James, B and Jiang, Y and Johnson, R and Kay, M and Lagarde, J and Martin, FJ and Gómez, LM and Nair, S and Ni, P and Pozo, F and Ramalingam, V and Ruffier, M and Schmitt, BM and Schreiber, JM and Steed, E and Suner, MM and Sumathipala, D and Sycheva, I and Uszczynska-Ratajczak, B and Wass, E and Yang, YT and Yates, A and Zafrulla, Z and Choudhary, JS and Gerstein, M and Guigo, R and Hubbard, TJP and Kellis, M and Kundaje, A and Paten, B and Tress, ML and Flicek, P}, title = {GENCODE: reference annotation for the human and mouse genomes in 2023.}, journal = {Nucleic acids research}, volume = {51}, number = {D1}, pages = {D942-D949}, pmid = {36420896}, issn = {1362-4962}, support = {R01 HG004037/HG/NHGRI NIH HHS/United States ; U41 HG007234/HG/NHGRI NIH HHS/United States ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {Humans ; Animals ; Mice ; Molecular Sequence Annotation ; *Computational Biology/methods ; *Genome, Human/genetics ; Transcriptome/genetics ; Gene Expression Profiling ; Databases, Genetic ; }, abstract = {GENCODE produces high quality gene and transcript annotation for the human and mouse genomes. All GENCODE annotation is supported by experimental data and serves as a reference for genome biology and clinical genomics. The GENCODE consortium generates targeted experimental data, develops bioinformatic tools and carries out analyses that, along with externally produced data and methods, support the identification and annotation of transcript structures and the determination of their function. Here, we present an update on the annotation of human and mouse genes, including developments in the tools, data, analyses and major collaborations which underpin this progress. For example, we report the creation of a set of non-canonical ORFs identified in GENCODE transcripts, the LRGASP collaboration to assess the use of long transcriptomic data to build transcript models, the progress in collaborations with RefSeq and UniProt to increase convergence in the annotation of human and mouse protein-coding genes, the propagation of GENCODE across the human pan-genome and the development of new tools to support annotation of regulatory features by GENCODE. Our annotation is accessible via Ensembl, the UCSC Genome Browser and https://www.gencodegenes.org.}, } @article {pmid36420160, year = {2022}, author = {Tripodi, P}, title = {Next generation sequencing technologies to explore the diversity of germplasm resources: Achievements and trends in tomato.}, journal = {Computational and structural biotechnology journal}, volume = {20}, number = {}, pages = {6250-6258}, pmid = {36420160}, issn = {2001-0370}, abstract = {Tomato is one of the major vegetable crops grown worldwide and a model species for genetic and biological research. Progress in genomic technologies made possible the development of forefront methods for high-scale sequencing, providing comprehensive insight into the genetic architecture of germplasm resources. This review revisits next-generation sequencing strategies and applications to investigate the diversity of tomato, describing the common platforms used for SNP genotyping of large collections, de novo sequencing, and whole genome resequencing. Significant findings in evolutionary history are outlined, thus discussing how genomics has provided new hints about the processes behind domestication. Finally, achievement and perspectives on pan-genome construction and graphical pan-genome development toward precise mining of the natural variation to be exploited for breeding purposes are presented.}, } @article {pmid36419435, year = {2022}, author = {Wang, Q and Zhang, L and Zhang, Y and Chen, H and Song, J and Lyu, M and Chen, R and Zhang, L}, title = {Comparative genomic analyses reveal genetic characteristics and pathogenic factors of Bacillus pumilus HM-7.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1008648}, pmid = {36419435}, issn = {1664-302X}, abstract = {Bacillus pumilus plays an important role in industrial application and biocontrol activities, as well as causing humans and plants disease, leading to economic losses and biosafety concerns. However, until now, the pathogenesis and underlying mechanisms of B. pumilus strains remain unclear. In our previous study, one representative isolate of B. pumilus named HM-7 has been recovered and proved to be the causal agent of fruit rot on muskmelon (Cucumis melo). Herein, we present a complete and annotated genome sequence of HM-7 that contains 4,111 coding genes in a single 3,951,520 bp chromosome with 41.04% GC content. A total of 3,481 genes were functionally annotated with the GO, COG, and KEGG databases. Pan-core genome analysis of HM-7 and 20 representative B. pumilus strains, as well as six closely related Bacillus species, discovered 740 core genes and 15,205 genes in the pan-genome of 21 B. pumilus strains, in which 485 specific-genes were identified in HM-7 genome. The average nucleotide identity (ANI), and whole-genome-based phylogenetic analysis revealed that HM-7 was most closely related to the C4, GR8, MTCC-B6033, TUAT1 and SH-B11 strains, but evolutionarily distinct from other strains in B. pumilus. Collinearity analysis of the six similar B. pumilus strains showed high levels of synteny but also several divergent regions for each strains. In the HM-7 genome, we identified 484 genes in the carbohydrate-active enzymes (CAZyme) class, 650 genes encoding virulence factors, and 1,115 genes associated with pathogen-host interactions. Moreover, three HM-7-specific regions were determined, which contained 424 protein-coding genes. Further investigation of these genes showed that 19 pathogenesis-related genes were mainly associated with flagella formation and secretion of toxic products, which might be involved in the virulence of strain HM-7. Our results provided detailed genomic and taxonomic information for the HM-7 strain, and discovered its potential pathogenic mechanism, which lay a foundation for developing effective prevention and control strategies against this pathogen in the future.}, } @article {pmid36419432, year = {2022}, author = {Kumar, P and Rani, S and Dahiya, P and Kumar, A and Dang, AS and Suneja, P}, title = {Whole genome analysis for plant growth promotion profiling of Pantoea agglomerans CPHN2, a non-rhizobial nodule endophyte.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {998821}, pmid = {36419432}, issn = {1664-302X}, abstract = {Reduced agricultural production as well as issues like nutrient-depleted soils, eutrophication, and groundwater contamination have drawn attention to the use of endophyte-based bioformulations to restore soil fertility. Pantoea agglomerans CPHN2, a non-rhizobial nodule endophyte isolated from Cicer arietinum, exhibited a variety of plant growth-promoting traits. In this study, we used NextSeq500 technology to analyze whole-genome sequence information of this plant growth-promoting endophytic bacteria. The genome of P. agglomerans CPHN2 has a length of 4,839,532 bp and a G + C content of 55.2%. The whole genome comprises three different genomic fractions, comprising one circular chromosome and two circular plasmids. A comparative analysis between P. agglomerans CPHN2 and 10 genetically similar strains was performed using a bacterial pan-genome pipeline. All the predicted and annotated gene sequences for plant growth promotions (PGPs), such as phosphate solubilization, siderophore synthesis, nitrogen metabolism, and indole-3-acetic acid (IAA) of P. agglomerans CPHN2, were identified. The whole-genome analysis of P. agglomerans CPHN2 provides an insight into the mechanisms underlying PGP by endophytes and its potential applications as a biofertilizer.}, } @article {pmid36417612, year = {2022}, author = {Brito, LP and Santos, DS and Freitas, NSA and Medeiros, RS and Souza, PRE and Soares, MTCV and Porto, ALF}, title = {In silico evaluation of genomic characteristics of Streptococcus infantarius subsp. infantarius for application in fermentations.}, journal = {Anais da Academia Brasileira de Ciencias}, volume = {94}, number = {suppl 3}, pages = {e20211447}, doi = {10.1590/0001-3765202220211447}, pmid = {36417612}, issn = {1678-2690}, mesh = {Fermentation ; *Streptococcus/genetics ; *Genomics ; Sequence Analysis, DNA ; }, abstract = {This study aims to evaluate the in silico genomic characteristics of Streptococcus infantarius subsp. infantarius, isolated from Coalho cheese from Paraíba, Brazil, with a view to application in lactic fermentations. rRNA sequences from the 16S ribosomal region were used as input to GenBank, in the search for patterns that could reveal a non-pathogenic behavior of S. infantarius subsp. infantarius, comparing mobile genetic elements, antibiotic resistance genes, pan-genome analysis and multi-genome alignment among related species. S. infantarius subsp. infantarius CJ18 was the only complete genome reported by BLAST/NCBI with high similarity and after comparative genetics with complete genomes of Streptococcus agalactiae (SAG153, NJ1606) and Streptococcus thermophilus (ST106, CS18, IDCC2201, APC151) revealed that CJ18 showed a low number of transposases and integrases, infection by phage bacteria of the Streptococcus genus, absence of antibiotic resistance genes and presence of bacteriocin, folate and riboflavin producing genes. The genome alignment revealed that the collinear blocks of S. thermophilus ST106 and S. agalactiae SAG153 have inverted blocks when compared to the CJ18 genome due to gene positioning, insertions and deletions. Therefore, the strains of S. infantarius subsp. infantarius isolated from Coalho cheese from Paraíba showed genomic similarity with CJ18 and the mobility of genes analyzed in silico showed absence of pathogenicity throughout the genome of CJ18, indicating the potential of these strains for the dairy industry.}, } @article {pmid36416120, year = {2023}, author = {Yang, L and Yang, Y and Huang, L and Cui, X and Liu, Y}, title = {From single- to multi-omics: future research trends in medicinal plants.}, journal = {Briefings in bioinformatics}, volume = {24}, number = {1}, pages = {}, pmid = {36416120}, issn = {1477-4054}, support = {202102AA310034//Major Science and Technology Special Project of Yunnan Province/ ; 31960134//National Natural Science Foundation of China/ ; KKAN20222025//Yunnan Major Scientific and Technological Projects/ ; }, mesh = {*Plants, Medicinal/genetics/metabolism ; Multiomics ; Genomics ; Proteomics ; Computational Biology ; Metabolomics ; }, abstract = {Medicinal plants are the main source of natural metabolites with specialised pharmacological activities and have been widely examined by plant researchers. Numerous omics studies of medicinal plants have been performed to identify molecular markers of species and functional genes controlling key biological traits, as well as to understand biosynthetic pathways of bioactive metabolites and the regulatory mechanisms of environmental responses. Omics technologies have been widely applied to medicinal plants, including as taxonomics, transcriptomics, metabolomics, proteomics, genomics, pangenomics, epigenomics and mutagenomics. However, because of the complex biological regulation network, single omics usually fail to explain the specific biological phenomena. In recent years, reports of integrated multi-omics studies of medicinal plants have increased. Until now, there have few assessments of recent developments and upcoming trends in omics studies of medicinal plants. We highlight recent developments in omics research of medicinal plants, summarise the typical bioinformatics resources available for analysing omics datasets, and discuss related future directions and challenges. This information facilitates further studies of medicinal plants, refinement of current approaches and leads to new ideas.}, } @article {pmid36415217, year = {2022}, author = {Golchha, NC and Nighojkar, A and Nighojkar, S}, title = {Redefining genomic view of Clostridioides difficile through pangenome analysis and identification of drug targets from its core genome.}, journal = {Drug target insights}, volume = {16}, number = {}, pages = {17-24}, pmid = {36415217}, issn = {1177-3928}, abstract = {INTRODUCTION:: Clostridioides difficile infection (CDI) is a leading cause of gastrointestinal infections and in the present day is a major concern for global health care system. The unavailability of specific antibiotics for CDI treatment and its emerging cases worldwide further broaden the challenge to control CDI.

METHODS:: The availability of a large number of genome sequences for C. difficile and many bioinformatics tools for genome analysis provides the opportunity for in silico pangenomic analysis. In the present study, 97 strains of C. difficile were used for pangenomic studies and characterized for their phylogenomic and functional analysis.

RESULTS:: Pangenome analysis reveals open pangenome of C. difficile and high genetic diversity. Sequence and interactome analysis of 1,481 core genes was done and eight potent drug targets are identified. Three drug targets, namely, aminodeoxychorismate synthase (PabB), D-alanyl-D-alanine carboxypeptidase (DD-CPase) and undecaprenyl diphospho-muramoyl pentapeptide beta-N-acetylglucosaminyl transferase (MurG transferase), have been reported as drug targets for other human pathogens, and five targets, namely, bifunctional diguanylate cyclase/phosphodiesterase (cyclic-diGMP), sporulation transcription factor (Spo0A), histidinol-phosphate transaminase (HisC), 3-deoxy-7-phosphoheptulonate synthase (DAHP synthase) and c-di-GMP phosphodiesterase (PdcA), are novel.

CONCLUSION:: The suggested potent targets could act as broad-spectrum drug targets for C. difficile. However, further validation needs to be done before using them for lead compound discovery.}, } @article {pmid36412754, year = {2022}, author = {Sánchez-Suárez, J and Díaz, L and Coy-Barrera, E and Villamil, L}, title = {Specialized Metabolism of Gordonia Genus: An Integrated Survey on Chemodiversity Combined with a Comparative Genomics-Based Analysis.}, journal = {Biotech (Basel (Switzerland))}, volume = {11}, number = {4}, pages = {}, pmid = {36412754}, issn = {2673-6284}, support = {80740-168-2019//Ministerio de Ciencia, Tecnología e Innovación (Colombia)/ ; ING-175-2016//Universidad de La Sabana/ ; }, abstract = {Members of the phylum Actinomycetota (formerly Actinobacteria) have historically been the most prolific providers of small bioactive molecules. Although the genus Streptomyces is the best-known member for this issue, other genera, such as Gordonia, have shown interesting potential in their specialized metabolism. Thus, we combined herein the result of a comprehensive literature survey on metabolites derived from Gordonia strains with a comparative genomic analysis to examine the potential of the specialized metabolism of the genus Gordonia. Thirty Gordonia-derived compounds of different classes were gathered (i.e., alkaloids, amides, phenylpropanoids, and terpenoids), exhibiting antimicrobial and cytotoxic activities, and several were also isolated from Streptomyces (e.g., actinomycin, nocardamin, diolmycin A1). With the genome data, we estimated an open pan-genome of 57,901 genes, most of them being part of the cloud genome. Regarding the BGCs content, 531 clusters were found, including Terpenes, RiPP-like, and NRPS clusters as the most frequent clusters. Our findings demonstrated that Gordonia is a poorly studied genus in terms of its specialized metabolism production and potential applications. Nevertheless, given their BGCs content, Gordonia spp. are a valuable biological resource that could expand the chemical spectrum of the phylum Actinomycetota, involving novel BGCs for inspiring innovative outlines for synthetic biology and further use in biotechnological initiatives. Therefore, further studies and more efforts should be made to explore different environments and evaluate other bioactivities.}, } @article {pmid36409181, year = {2022}, author = {Mun, T and Vaddadi, NSK and Langmead, B}, title = {Pangenomic Genotyping with the Marker Array.}, journal = {Algorithms in bioinformatics : ... International Workshop, WABI ..., proceedings. WABI (Workshop)}, volume = {242}, number = {}, pages = {}, pmid = {36409181}, support = {R01 HG011392/HG/NHGRI NIH HHS/United States ; R35 GM139602/GM/NIGMS NIH HHS/United States ; }, abstract = {We present a new method and software tool called rowbowt that applies a pangenome index to the problem of inferring genotypes from short-read sequencing data. The method uses a novel indexing structure called the marker array. Using the marker array, we can genotype variants with respect from large panels like the 1000 Genomes Project while avoiding the reference bias that results when aligning to a single linear reference. rowbowt can infer accurate genotypes in less time and memory compared to existing graph-based methods.}, } @article {pmid36408900, year = {2023}, author = {Fullam, A and Letunic, I and Schmidt, TSB and Ducarmon, QR and Karcher, N and Khedkar, S and Kuhn, M and Larralde, M and Maistrenko, OM and Malfertheiner, L and Milanese, A and Rodrigues, JFM and Sanchis-López, C and Schudoma, C and Szklarczyk, D and Sunagawa, S and Zeller, G and Huerta-Cepas, J and von Mering, C and Bork, P and Mende, DR}, title = {proGenomes3: approaching one million accurately and consistently annotated high-quality prokaryotic genomes.}, journal = {Nucleic acids research}, volume = {51}, number = {D1}, pages = {D760-D766}, pmid = {36408900}, issn = {1362-4962}, mesh = {Databases, Genetic ; *Genome ; Genomics ; Molecular Sequence Annotation ; *Prokaryotic Cells ; Bacteria/classification/genetics ; }, abstract = {The interpretation of genomic, transcriptomic and other microbial 'omics data is highly dependent on the availability of well-annotated genomes. As the number of publicly available microbial genomes continues to increase exponentially, the need for quality control and consistent annotation is becoming critical. We present proGenomes3, a database of 907 388 high-quality genomes containing 4 billion genes that passed stringent criteria and have been consistently annotated using multiple functional and taxonomic databases including mobile genetic elements and biosynthetic gene clusters. proGenomes3 encompasses 41 171 species-level clusters, defined based on universal single copy marker genes, for which pan-genomes and contextual habitat annotations are provided. The database is available at http://progenomes.embl.de/.}, } @article {pmid36408592, year = {2022}, author = {Vij, S and Thakur, R and Rishi, P}, title = {Reverse engineering approach: a step towards a new era of vaccinology with special reference to Salmonella.}, journal = {Expert review of vaccines}, volume = {21}, number = {12}, pages = {1763-1785}, doi = {10.1080/14760584.2022.2148661}, pmid = {36408592}, issn = {1744-8395}, mesh = {Humans ; Vaccinology ; *Typhoid Fever/prevention & control ; Salmonella/genetics ; *Typhoid-Paratyphoid Vaccines ; Anti-Bacterial Agents ; Epitopes ; }, abstract = {INTRODUCTION: Salmonella is responsible for causing enteric fever, septicemia, and gastroenteritis in humans. Due to high disease burden and emergence of multi- and extensively drug-resistant Salmonella strains, it is becoming difficult to treat the infection with existing battery of antibiotics as we are not able to discover newer antibiotics at the same pace at which the pathogens are acquiring resistance. Though vaccines against Salmonella are available commercially, they have limited efficacy. Advancements in genome sequencing technologies and immunoinformatics approaches have solved the problem significantly by giving rise to a new era of vaccine designing, i.e. 'Reverse engineering.' Reverse engineering/vaccinology has expedited the vaccine identification process. Using this approach, multiple potential proteins/epitopes can be identified and constructed as a single entity to tackle enteric fever.

AREAS COVERED: This review provides details of reverse engineering approach and discusses various protein and epitope-based vaccine candidates identified using this approach against typhoidal Salmonella.

EXPERT OPINION: Reverse engineering approach holds great promise for developing strategies to tackle the pathogen(s) by overcoming the limitations posed by existing vaccines. Progressive advancements in the arena of reverse vaccinology, structural biology, and systems biology combined with an improved understanding of host-pathogen interactions are essential components to design new-generation vaccines.}, } @article {pmid36405966, year = {2022}, author = {Guo, Y and Zeng, C and Ma, C and Cai, H and Jiang, X and Zhai, S and Xu, X and Lin, M}, title = {Comparative genomics analysis of the multidrug-resistant Aeromonas hydrophila MX16A providing insights into antibiotic resistance genes.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {1042350}, pmid = {36405966}, issn = {2235-2988}, mesh = {*Aeromonas hydrophila/genetics ; *Anti-Bacterial Agents/pharmacology ; Drug Resistance, Microbial ; beta-Lactams ; Genomics ; }, abstract = {In this paper, the whole genome of the multidrug-resistant Aeromonas hydrophila MX16A was comprehensively analyzed and compared after sequencing by PacBio RS II. To shed light on the drug resistance mechanism of A. hydrophila MX16A, a Kirby-Bauer disk diffusion method was used to assess the phenotypic drug susceptibility. Importantly, resistance against β-lactam, sulfonamides, rifamycins, macrolides, tetracyclines and chloramphenicols was largely consistent with the prediction analysis results of drug resistance genes in the CARD database. The varied types of resistance genes identified from A. hydrophila MX16A revealed multiple resistance mechanisms, including enzyme inactivation, gene mutation and active effusion. The publicly available complete genomes of 35 Aeromonas hydrophila strains on NCBI, including MX16A, were downloaded for genomic comparison and analysis. The analysis of 33 genomes with ANI greater than 95% showed that the pan-genome consisted of 9556 genes, and the core genes converged to 3485 genes. In summary, the obtained results showed that A. hydrophila exhibited a great genomic diversity as well as diverse metabolic function and it is believed that frequent exchanges between strains lead to the horizontal transfer of drug resistance genes.}, } @article {pmid36404338, year = {2022}, author = {Orata, FD and Hussain, NAS and Liang, KYH and Hu, D and Boucher, YF}, title = {Genomes of Vibrio metoecus co-isolated with Vibrio cholerae extend our understanding of differences between these closely related species.}, journal = {Gut pathogens}, volume = {14}, number = {1}, pages = {42}, pmid = {36404338}, issn = {1757-4749}, abstract = {BACKGROUND: Vibrio cholerae, the causative agent of cholera, is a well-studied species, whereas Vibrio metoecus is a recently described close relative that is also associated with human infections. The availability of V. metoecus genomes provides further insight into its genetic differences from V. cholerae. Additionally, both species have been co-isolated from a cholera-free brackish coastal pond and have been suggested to interact with each other by horizontal gene transfer (HGT).

RESULTS: The genomes of 17 strains from each species were sequenced. All strains share a large core genome (2675 gene families) and very few genes are unique to each species (< 3% of the pan-genome of both species). This led to the identification of potential molecular markers-for nitrite reduction, as well as peptidase and rhodanese activities-to further distinguish V. metoecus from V. cholerae. Interspecies HGT events were inferred in 21% of the core genes and 45% of the accessory genes. A directional bias in gene transfer events was found in the core genome, where V. metoecus was a recipient of three times (75%) more genes from V. cholerae than it was a donor (25%).

CONCLUSION: V. metoecus was misclassified as an atypical variant of V. cholerae due to their resemblance in a majority of biochemical characteristics. More distinguishing phenotypic assays can be developed based on the discovery of potential gene markers to avoid any future misclassifications. Furthermore, differences in relative abundance or seasonality were observed between the species and could contribute to the bias in directionality of HGT.}, } @article {pmid36395320, year = {2022}, author = {Lofgren, LA and Ross, BS and Cramer, RA and Stajich, JE}, title = {The pan-genome of Aspergillus fumigatus provides a high-resolution view of its population structure revealing high levels of lineage-specific diversity driven by recombination.}, journal = {PLoS biology}, volume = {20}, number = {11}, pages = {e3001890}, pmid = {36395320}, issn = {1545-7885}, support = {R01 AI130128/AI/NIAID NIH HHS/United States ; S10 OD016290/OD/NIH HHS/United States ; T32 HL134598/HL/NHLBI NIH HHS/United States ; }, mesh = {*Antifungal Agents ; *Aspergillus fumigatus/genetics ; Drug Resistance, Fungal ; Genomics ; Recombination, Genetic/genetics ; }, abstract = {Aspergillus fumigatus is a deadly agent of human fungal disease where virulence heterogeneity is thought to be at least partially structured by genetic variation between strains. While population genomic analyses based on reference genome alignments offer valuable insights into how gene variants are distributed across populations, these approaches fail to capture intraspecific variation in genes absent from the reference genome. Pan-genomic analyses based on de novo assemblies offer a promising alternative to reference-based genomics with the potential to address the full genetic repertoire of a species. Here, we evaluate 260 genome sequences of A. fumigatus including 62 newly sequenced strains, using a combination of population genomics, phylogenomics, and pan-genomics. Our results offer a high-resolution assessment of population structure and recombination frequency, phylogenetically structured gene presence-absence variation, evidence for metabolic specificity, and the distribution of putative antifungal resistance genes. Although A. fumigatus disperses primarily via asexual conidia, we identified extraordinarily high levels of recombination with the lowest linkage disequilibrium decay value reported for any fungal species to date. We provide evidence for 3 primary populations of A. fumigatus, with recombination occurring only rarely between populations and often within them. These 3 populations are structured by both gene variation and distinct patterns of gene presence-absence with unique suites of accessory genes present exclusively in each clade. Accessory genes displayed functional enrichment for nitrogen and carbohydrate metabolism suggesting that populations may be stratified by environmental niche specialization. Similarly, the distribution of antifungal resistance genes and resistance alleles were often structured by phylogeny. Altogether, the pan-genome of A. fumigatus represents one of the largest fungal pan-genomes reported to date including many genes unrepresented in the Af293 reference genome. These results highlight the inadequacy of relying on a single-reference genome-based approach for evaluating intraspecific variation and the power of combined genomic approaches to elucidate population structure, genetic diversity, and putative ecological drivers of clinically relevant fungi.}, } @article {pmid36386637, year = {2022}, author = {Jiang, ZM and Deng, Y and Han, XF and Su, J and Wang, H and Yu, LY and Zhang, YQ}, title = {Geminicoccus flavidas sp. nov. and Geminicoccus harenae sp. nov., two IAA-producing novel rare bacterial species inhabiting desert biological soil crusts.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1034816}, pmid = {36386637}, issn = {1664-302X}, abstract = {Two Gram-staining negative strains (CPCC 101082[T] and CPCC 101083[T]) were isolated from biological sandy soil crusts samples collected from Badain Jaran desert, China. Both isolates were heterotrophic phototroph, could produce indole-3-acetic acid. The 16S rRNA gene sequences of these two strains were closely related to the members of the family Geminicoccaceae, showing high similarities with Geminicoccus roseus DSM 18922[T] (96.9%) and Arboricoccus pini B29T1[T] (90.1%), respectively. In phylogenetic tree based on 16S rRNA gene sequences, strain CPCC 101082[T] and CPCC 101083[T] formed a robust distinct clade with Geminicoccus roseus DSM 18922[T] within the family Geminicoccaceae, which indicated that these two isolates could be classified into the genus Geminicoccus. The growth of strain CPCC 101082[T] occurred at 15-42°C and pH 4.0-10.0 (optima at 28-37°C and pH 6.0-8.0). The growth of strain CPCC 101083[T] occurred at 4-45°C and pH 4.0-10.0 (optima at 25-30°C and pH 6.0-8.0). The major cellular fatty acids of CPCC 101082[T] and CPCC 101083[T] contained C18:1 ω7c/C18:1 ω6c, cyclo-C19:0 ω8c, and C16:0. Q-10 was detected as the sole respiratory quinone. Diphosphatidylglycerol, phosphatidylglycerol, phosphatidylcholine, phosphatidylethanolamine, an unidentified phospholipid and an unidentified aminolipid were tested in the polar lipids profile. The genomes of the two isolates were characterized as about 5.9 Mbp in size with the G + C content of nearly 68%. The IAA-producing encoding genes were predicated in both genomes. The values of average nucleotide identity were 80.6, 81.2 and 92.4% based on a pairwise comparison of the genomes of strains CPCC 101082[T] and CPCC 101083[T] and Geminicoccus roseus DSM 18922[T], respectively. On the basis of the genotypic, chemotaxonomic and phenotypic characteristics, the strains CPCC 101082[T] (=NBRC 113513[T] = KCTC 62853[T]) and CPCC 101083[T] (=NBRC 113514[T] = KCTC 62854[T]) are proposed to represent two novel species of the genus Geminicoccus with the names Geminicoccus flavidas sp. nov. and Geminicoccus harenae sp. nov.}, } @article {pmid36377929, year = {2023}, author = {Daware, A and Malik, A and Srivastava, R and Das, D and Ellur, RK and Singh, AK and Tyagi, AK and Parida, SK}, title = {Rice Pangenome Genotyping Array: an efficient genotyping solution for pangenome-based accelerated genetic improvement in rice.}, journal = {The Plant journal : for cell and molecular biology}, volume = {113}, number = {1}, pages = {26-46}, doi = {10.1111/tpj.16028}, pmid = {36377929}, issn = {1365-313X}, mesh = {Chromosome Mapping ; *Genome-Wide Association Study ; *Oryza/genetics ; Genotype ; Quantitative Trait Loci/genetics ; Polymorphism, Single Nucleotide/genetics ; }, abstract = {The advent of the pangenome era has unraveled previously unknown genetic variation existing within diverse crop plants, including rice. This untapped genetic variation is believed to account for a major portion of phenotypic variation existing in crop plants. However, the use of conventional single reference-guided genotyping often fails to capture a large portion of this genetic variation leading to a reference bias. This makes it difficult to identify and utilize novel population/cultivar-specific genes for crop improvement. Thus, we developed a Rice Pangenome Genotyping Array (RPGA) harboring probes assaying 80K single-nucleotide polymorphisms (SNPs) and presence-absence variants spanning the entire 3K rice pangenome. This array provides a simple, user-friendly and cost-effective (60-80 USD per sample) solution for rapid pangenome-based genotyping in rice. The genome-wide association study (GWAS) conducted using RPGA-SNP genotyping data of a rice diversity panel detected a total of 42 loci, including previously known as well as novel genomic loci regulating grain size/weight traits in rice. Eight of these identified trait-associated loci (dispensable loci) could not be detected with conventional single reference genome-based GWAS. A WD repeat-containing PROTEIN 12 gene underlying one of such dispensable locus on chromosome 7 (qLWR7) along with other non-dispensable loci were subsequently detected using high-resolution quantitative trait loci mapping confirming authenticity of RPGA-led GWAS. This demonstrates the potential of RPGA-based genotyping to overcome reference bias. The application of RPGA-based genotyping for population structure analysis, hybridity testing, ultra-high-density genetic map construction and chromosome-level genome assembly, and marker-assisted selection was also demonstrated. A web application (http://www.rpgaweb.com) was further developed to provide an easy to use platform for the imputation of RPGA-based genotyping data using 3K rice reference panel and subsequent GWAS.}, } @article {pmid36377253, year = {2023}, author = {Tello, D and Gonzalez-Garcia, LN and Gomez, J and Zuluaga-Monares, JC and Garcia, R and Angel, R and Mahecha, D and Duarte, E and Leon, MDR and Reyes, F and Escobar-Velásquez, C and Linares-Vásquez, M and Cardozo, N and Duitama, J}, title = {NGSEP 4: Efficient and accurate identification of orthogroups and whole-genome alignment.}, journal = {Molecular ecology resources}, volume = {23}, number = {3}, pages = {712-724}, doi = {10.1111/1755-0998.13737}, pmid = {36377253}, issn = {1755-0998}, support = {80740-441-2020//Ministerio de Ciencia Tecnología e Innovación de Colombia/ ; //Universidad de los Andes/ ; }, mesh = {*Software ; *Genome ; Genomics/methods ; Algorithms ; Metagenomics ; }, abstract = {Whole-genome alignment allows researchers to understand the genomic structure and variation among genomes. Approaches based on direct pairwise comparisons of DNA sequences require large computational capacities. As a consequence, pipelines combining tools for orthologous gene identification and synteny have been developed. In this manuscript, we present the latest functionalities implemented in NGSEP 4, to identify orthogroups and perform whole genome alignments. NGSEP implements functionalities for identification of clusters of homologus genes, synteny analysis and whole genome alignment. Our results showed that the NGSEP algorithm for orthogroups identification has competitive accuracy and efficiency in comparison to commonly used tools. The implementation also includes a visualization of the whole genome alignment based on synteny of the orthogroups that were identified, and a reconstruction of the pangenome based on frequencies of the orthogroups among the genomes. NGSEP 4 also includes a new graphical user interface based on the JavaFX technology. We expect that these new developments will be very useful for several studies in evolutionary biology and population genomics.}, } @article {pmid36376589, year = {2023}, author = {Chivian, D and Jungbluth, SP and Dehal, PS and Wood-Charlson, EM and Canon, RS and Allen, BH and Clark, MM and Gu, T and Land, ML and Price, GA and Riehl, WJ and Sneddon, MW and Sutormin, R and Zhang, Q and Cottingham, RW and Henry, CS and Arkin, AP}, title = {Metagenome-assembled genome extraction and analysis from microbiomes using KBase.}, journal = {Nature protocols}, volume = {18}, number = {1}, pages = {208-238}, pmid = {36376589}, issn = {1750-2799}, mesh = {*Metagenome ; Phylogeny ; Genome, Bacterial ; *Microbiota/genetics ; Bacteria/genetics ; Metagenomics ; }, abstract = {Uncultivated Bacteria and Archaea account for the vast majority of species on Earth, but obtaining their genomes directly from the environment, using shotgun sequencing, has only become possible recently. To realize the hope of capturing Earth's microbial genetic complement and to facilitate the investigation of the functional roles of specific lineages in a given ecosystem, technologies that accelerate the recovery of high-quality genomes are necessary. We present a series of analysis steps and data products for the extraction of high-quality metagenome-assembled genomes (MAGs) from microbiomes using the U.S. Department of Energy Systems Biology Knowledgebase (KBase) platform (http://www.kbase.us/). Overall, these steps take about a day to obtain extracted genomes when starting from smaller environmental shotgun read libraries, or up to about a week from larger libraries. In KBase, the process is end-to-end, allowing a user to go from the initial sequencing reads all the way through to MAGs, which can then be analyzed with other KBase capabilities such as phylogenetic placement, functional assignment, metabolic modeling, pangenome functional profiling, RNA-Seq and others. While portions of such capabilities are available individually from other resources, the combination of the intuitive usability, data interoperability and integration of tools in a freely available computational resource makes KBase a powerful platform for obtaining MAGs from microbiomes. While this workflow offers tools for each of the key steps in the genome extraction process, it also provides a scaffold that can be easily extended with additional MAG recovery and analysis tools, via the KBase software development kit (SDK).}, } @article {pmid36375718, year = {2023}, author = {Santos, RGD and Hurtado, R and Rodrigues, DLN and Lima, A and Dos Anjos, WF and Rifici, C and Attili, AR and Tiwari, S and Jaiswal, AK and Spier, SJ and Mazzullo, G and Morais-Rodrigues, F and Gomide, ACP and de Jesus, LCL and Aburjaile, FF and Brenig, B and Cuteri, V and Castro, TLP and Seyffert, N and Santos, A and Góes-Neto, A and de Jesus Sousa, T and Azevedo, V}, title = {Comparative genomic analysis of the Dietzia genus: an insight into genomic diversity, and adaptation.}, journal = {Research in microbiology}, volume = {174}, number = {3}, pages = {103998}, doi = {10.1016/j.resmic.2022.103998}, pmid = {36375718}, issn = {1769-7123}, mesh = {Sequence Analysis, DNA ; Phylogeny ; *Genomics ; Genome, Bacterial/genetics ; Base Sequence ; *Actinomycetales/genetics ; }, abstract = {Dietzia strains are widely distributed in the environment, presenting an opportunistic role, and some species have undetermined taxonomic characteristics. Here, we propose the existence of errors in the classification of species in this genus using comparative genomics. We performed ANI, dDDH, pangenome and genomic plasticity analyses better to elucidate the phylogenomic relationships between Dietzia strains. For this, we used 55 genomes of Dietzia downloaded from public databases that were combined with a newly sequenced. Sequence analysis of a phylogenetic tree based on genome similarity comparisons and dDDH, ANI analyses supported grouping different Dietzia species into four distinct groups. The pangenome analysis corroborated the classification of these groups, supporting the idea that some species of Dietzia could be reassigned in a possible classification into three distinct species, each containing less variability than that found within the global pangenome of all strains. Additionally, analysis of genomic plasticity based on groups containing Dietzia strains found differences in the presence and absence of symbiotic Islands and pathogenic islands related to their isolation site. We propose that the comparison of pangenome subsets together with phylogenomic approaches can be used as an alternative for the classification and differentiation of new species of the genus Dietzia.}, } @article {pmid36375370, year = {2022}, author = {Islam, J and Sarkar, H and Hoque, H and Hasan, MN and Jewel, GMNA}, title = {In-silico approach of identifying novel therapeutic targets against Yersinia pestis using pan and subtractive genomic analysis.}, journal = {Computational biology and chemistry}, volume = {101}, number = {}, pages = {107784}, doi = {10.1016/j.compbiolchem.2022.107784}, pmid = {36375370}, issn = {1476-928X}, mesh = {Humans ; *Yersinia pestis/genetics ; *Plague/drug therapy/genetics/microbiology ; Genomics ; Genome, Bacterial ; Virulence Factors ; }, abstract = {The magnitude of human affliction brought about by bacterial infections has been on the rise since the mid-5th century. Yersinia pestis is one such notable, gram-negative bacterium that inflicted havoc around the globe three times throughout different millenniums by causing deadly plagues. Despite the unremitting efforts by scientists, different strains of Yersinia pestis are still affecting the populations in various parts of the world by growing resistant to existing antimicrobial agents owing to their overuse. The current scenario, therefore, calls for new therapeutics to further combat the disease. In this study, 3105 core, 387 pathogen-specific unique, 536 choke-point, 796 virulence factors, and 115 antimicrobial resistant proteins were found using a pan-genomic and subtractive genome analysis of nine Yersinia pestis strains that could be instrumental in the development of drugs against Yersinia pestis. Subsequently, 1461 and 1114 essential proteins were identified as non-homologous to human and gut microflora. 535 and 30 proteins were predicted as cytoplasmic and broad-spectrum targets respectively. Finally, four potential targets were selected for their high connectivity in protein-protein interaction network. These selected target proteins are associated with one of the major lipopolysaccharide biosynthesis pathways. Therefore, dismantling their activity might indicate a probable strategy for developing therapeutics to combat bacterial infection caused by Yersinia pestis. However, further experimental validation in the laboratory is needed to consolidate the research findings.}, } @article {pmid36367506, year = {2022}, author = {Qu, L and Li, Y and Wang, W and Shao, Z and Gao, Z and Lai, Q}, title = {Aestuarium zhoushanense is a later heterotypic synonym of Marivivens donghaensis, and transfer of Paradonghicola geojensis to the genus Marivivens as Marivivens geojensis comb. nov.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {72}, number = {11}, pages = {}, doi = {10.1099/ijsem.0.005564}, pmid = {36367506}, issn = {1466-5034}, mesh = {RNA, Ribosomal, 16S/genetics ; Phylogeny ; DNA, Bacterial/genetics ; Bacterial Typing Techniques ; Base Composition ; Sequence Analysis, DNA ; *Fatty Acids/chemistry ; Nucleic Acid Hybridization ; }, abstract = {The 16S rRNA genes of Aestuarium zhoushanense G7[T] and Paradonghicola geojensis FJ12[T] shared 100 % sequence identity with Marivivens donghaensis AM-4[T]. Phylogeny of 16S rRNA gene sequences showed that the three type strains formed a monophyletic clade within the genus Marivivens. Whole genome sequence comparisons showed that three type strains shared 46.7-69.7 % digital DNA-DNA hybridization, 92.1-96.4 % average nucleotide identity and 96.2-98.1 % average amino acid identity. The high 16S rRNA gene similarity values show that three type strains should belong to the same genus. The pan-genome of the five strains contained 5754 genes including 1877 core genes. Based on the principle of priority, we propose that A. zhoushanense Yu et al. 2019 is a later heterotypic synonym of M. donghaensis Park et al. 2016, and P. geojensis should be reclassified as Marivivens geojensis comb. nov., respectively.}, } @article {pmid36366394, year = {2022}, author = {Mushtaq, M and Khan, S and Hassan, M and Al-Harbi, AI and Hameed, AR and Khan, K and Ismail, S and Irfan, M and Ahmad, S}, title = {Computational Design of a Chimeric Vaccine against Plesiomonas shigelloides Using Pan-Genome and Reverse Vaccinology.}, journal = {Vaccines}, volume = {10}, number = {11}, pages = {}, pmid = {36366394}, issn = {2076-393X}, abstract = {The swift emergence of antibiotic resistance (AR) in bacterial pathogens to make themselves adaptable to changing environments has become an alarming health issue. To prevent AR infection, many ways can be accomplished such as by decreasing the misuse of antibiotics in human and animal medicine. Among these AR bacterial species, Plesiomonas shigelloides is one of the etiological agents of intestinal infection in humans. It is a gram-negative rod-shaped bacterium that is highly resistant to several classes of antibiotics, and no licensed vaccine against the aforementioned pathogen is available. Hence, substantial efforts are required to screen protective antigens from the pathogen whole genome that can be subjected easily to experimental evaluations. Here, we employed a reverse vaccinology (RV) approach to design a multi-antigenic epitopes based vaccine against P. shigelloides. The complete genomes of P. shigelloides were retrieved from the National Center for Biotechnological Information (NCBI) that on average consist of 5226 proteins. The complete proteomes were subjected to different subtractive proteomics filters, and in the results of that analysis, out of total proteins, 2399 were revealed as non-redundant and 2827 as redundant proteins. The non-redundant proteins were further checked for subcellular localization analysis, in which three were localized in the extracellular matrix, eight were outer membrane, and 13 were found in the periplasmic membrane. All surface localized proteins were found to be virulent. Out of a total of 24 virulent proteins, three proteins (flagellar hook protein (FlgE), hypothetical protein, and TonB-dependent hemoglobin/transferrin/lactoferrin family receptor protein) were considered as potential vaccine targets and subjected to epitopes prediction. The predicted epitopes were further examined for antigenicity, toxicity, and solubility. A total of 10 epitopes were selected (GFKESRAEF, VQVPTEAGQ, KINENGVVV, ENKALSQET, QGYASANDE, RLNPTDSRW, TLDYRLNPT, RVTKKQSDK, GEREGKNRP, RDKKTNQPL). The selected epitopes were linked with each other via specific GPGPG linkers in order to design a multi-epitopes vaccine construct, and linked with cholera toxin B subunit adjuvant to make the designed vaccine construct more efficient in terms of antigenicity. The 3D structure of the vaccine construct was modeled ab initio as no appropriate template was available. Furthermore, molecular docking was carried out to check the interaction affinity of the designed vaccine with major histocompatibility complex (MHC-)I (PDB ID: 1L1Y), MHC-II (1KG0), and toll-like receptor 4 ((TLR-4) (PDB: 4G8A). Molecular dynamic simulation was applied to evaluate the dynamic behavior of vaccine-receptor complexes. Lastly, the binding free energies of the vaccine with receptors were estimated by using MMPB/GBSA methods. All of the aforementioned analyses concluded that the designed vaccine molecule as a good candidate to be used in experimental studies to disclose its immune protective efficacy in animal models.}, } @article {pmid36363712, year = {2022}, author = {Murr, L and Huber, I and Pavlovic, M and Guertler, P and Messelhaeusser, U and Weiss, M and Ehrmann, M and Tuschak, C and Bauer, H and Wenning, M and Busch, U and Bretschneider, N}, title = {Whole-Genome Sequence Comparisons of Listeria monocytogenes Isolated from Meat and Fish Reveal High Inter- and Intra-Sample Diversity.}, journal = {Microorganisms}, volume = {10}, number = {11}, pages = {}, pmid = {36363712}, issn = {2076-2607}, support = {72577//Bavarian State Ministry for Environment and Consumer Protection (StMUV)/ ; }, abstract = {Interpretation of whole-genome sequencing (WGS) data for foodborne outbreak investigations is complex, as the genetic diversity within processing plants and transmission events need to be considered. In this study, we analyzed 92 food-associated Listeria monocytogenes isolates by WGS-based methods. We aimed to examine the genetic diversity within meat and fish production chains and to assess the applicability of suggested thresholds for clustering of potentially related isolates. Therefore, meat-associated isolates originating from the same samples or processing plants as well as fish-associated isolates were analyzed as distinct sets. In silico serogrouping, multilocus sequence typing (MLST), core genome MLST (cgMLST), and pangenome analysis were combined with screenings for prophages and genetic traits. Isolates of the same subtypes (cgMLST types (CTs) or MLST sequence types (STs)) were additionally compared by SNP calling. This revealed the occurrence of more than one CT within all three investigated plants and within two samples. Analysis of the fish set resulted in predominant assignment of isolates from pangasius catfish and salmon to ST2 and ST121, respectively, potentially indicating persistence within the respective production chains. The approach not only allowed the detection of distinct subtypes but also the determination of differences between closely related isolates, which need to be considered when interpreting WGS data for surveillance.}, } @article {pmid36362240, year = {2022}, author = {Khoder, M and Osman, M and Kassem, II and Rafei, R and Shahin, A and Fournier, PE and Rolain, JM and Hamze, M}, title = {Whole Genome Analyses Accurately Identify Neisseria spp. and Limit Taxonomic Ambiguity.}, journal = {International journal of molecular sciences}, volume = {23}, number = {21}, pages = {}, pmid = {36362240}, issn = {1422-0067}, support = {N/A//Azm & Saade Association/ ; N/A//Erasmus Mundus/ ; N/A//Cornell Atkinson Postdoctoral Fellowship/ ; }, mesh = {Male ; Humans ; Phylogeny ; *Neisseria/genetics ; Neisseria gonorrhoeae/genetics ; *Neisseria meningitidis/genetics ; Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization ; DNA ; Genome, Bacterial ; }, abstract = {Genome sequencing facilitates the study of bacterial taxonomy and allows the re-evaluation of the taxonomic relationships between species. Here, we aimed to analyze the draft genomes of four commensal Neisseria clinical isolates from the semen of infertile Lebanese men. To determine the phylogenetic relationships among these strains and other Neisseria spp. and to confirm their identity at the genomic level, we compared the genomes of these four isolates with the complete genome sequences of Neisseria gonorrhoeae and Neisseria meningitidis and the draft genomes of Neisseria flavescens, Neisseria perflava, Neisseria mucosa, and Neisseria macacae that are available in the NCBI Genbank database. Our findings revealed that the WGS analysis accurately identified and corroborated the matrix-assisted laser desorption ionization-time of flight (MALDI-TOF) species identities of the Neisseria isolates. The combination of three well-established genome-based taxonomic tools (in silico DNA-DNA Hybridization, Ortho Average Nucleotide identity, and pangenomic studies) proved to be relatively the best identification approach. Notably, we also discovered that some Neisseria strains that are deposited in databases contain many taxonomical errors. The latter is very important and must be addressed to prevent misdiagnosis and missing emerging etiologies. We also highlight the need for robust cut-offs to delineate the species using genomic tools.}, } @article {pmid36362207, year = {2022}, author = {Hameed, A and Poznanski, P and Nadolska-Orczyk, A and Orczyk, W}, title = {Graph Pangenomes Track Genetic Variants for Crop Improvement.}, journal = {International journal of molecular sciences}, volume = {23}, number = {21}, pages = {}, pmid = {36362207}, issn = {1422-0067}, support = {2019/35/B/NZ9/00323//National Science Center/ ; }, mesh = {Humans ; *Genome-Wide Association Study ; *Quantitative Trait Loci ; Polymorphism, Single Nucleotide ; Plant Breeding ; Multifactorial Inheritance ; Crops, Agricultural/genetics ; }, abstract = {Global climate change and the urgency to transform crops require an exhaustive genetic evaluation. The large polyploid genomes of food crops, such as cereals, make it difficult to identify candidate genes with confirmed hereditary. Although genome-wide association studies (GWAS) have been proficient in identifying genetic variants that are associated with complex traits, the resolution of acquired heritability faces several significant bottlenecks such as incomplete detection of structural variants (SV), genetic heterogeneity, and/or locus heterogeneity. Consequently, a biased estimate is generated with respect to agronomically complex traits. The graph pangenomes have resolved this missing heritability and provide significant details in terms of specific loci segregating among individuals and evolving to variations. The graph pangenome approach facilitates crop improvements through genome-linked fast breeding.}, } @article {pmid36358771, year = {2022}, author = {Cinque, A and Minnei, R and Floris, M and Trevisani, F}, title = {The Clinical and Molecular Features in the VHL Renal Cancers; Close or Distant Relatives with Sporadic Clear Cell Renal Cell Carcinoma?.}, journal = {Cancers}, volume = {14}, number = {21}, pages = {}, pmid = {36358771}, issn = {2072-6694}, abstract = {Von Hippel-Lindau (VHL) disease is an autosomal dominant inherited cancer syndrome caused by germline mutations in the VHL tumor suppressor gene, characterized by the susceptibility to a wide array of benign and malign neoplasms, including clear-cell renal cell carcinoma. Moreover, VHL somatic inactivation is a crucial molecular event also in sporadic ccRCCs tumorigenesis. While systemic biomarkers in the VHL syndrome do not currently play a role in clinical practice, a new promising class of predictive biomarkers, microRNAs, has been increasingly studied. Lots of pan-genomic studies have deeply investigated the possible biological role of microRNAs in the development and progression of sporadic ccRCC; however, few studies have investigated the miRNA profile in VHL patients. Our review summarize all the new insights related to clinical and molecular features in VHL renal cancers, with a particular focus on the overlap with sporadic ccRCC.}, } @article {pmid36358219, year = {2022}, author = {Moglad, E and Alanazi, N and Altayb, HN}, title = {Genomic Study of Chromosomally and Plasmid-Mediated Multidrug Resistance and Virulence Determinants in Klebsiella Pneumoniae Isolates Obtained from a Tertiary Hospital in Al-Kharj, KSA.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {11}, number = {11}, pages = {}, pmid = {36358219}, issn = {2079-6382}, support = {IF-PSAU-2021/03/17707//Prince Sattam Bin Abdulaziz University/ ; }, abstract = {Klebsiella pneumoniae is an emergent pathogen causing respiratory tract, bloodstream, and urinary tract infections in humans. This study defines the genomic sequence data, genotypic and phenotypic characterization of K. pneumoniae clinically isolated from Al-Kharj, KSA. Whole-genome analysis of four K. pneumoniae strains was performed, including de novo assembly, functional annotation, whole-genome-phylogenetic analysis, antibiotic-resistant gene identification, prophage regions, virulent factor, and pan-genome analysis. The results showed that K6 and K7 strains were MDR and ESBL producers, K16 was an ESBL producer, and K8 was sensitive to all tested drugs except ampicillin. K6 and K7 were identified with sequence type (ST) 23, while K16 and K8 were identified with STs 353 and 592, respectively. K6 and K7 were identified with the K1 (wzi1 genotype) capsule and O1 serotype, while K8 was identified with the K57 (wzi206 genotype) capsule and O3b. K6 isolates harbored 10 antimicrobial resistance genes (ARGs) associated with four different plasmids; the chloramphenicol acetyltransferase (catB3), blaOXA-1 and aac(6')-Ib-cr genes were detected in plasmid pB-8922_OXA-48. K6 and K7 also carried a similar gene cassette in plasmid pC1K6P0122-2; the gene cassettes were the trimethoprim-resistant gene (dfrA14), integron integrase (IntI1), insertion sequence (IS1), transposase protein, and replication initiation protein (RepE). Two hypervirulent plasmids were reported in isolates K6 and K7 that carried synthesis genes (iucA, iucB, iucC, iucD, and iutA) and iron siderophore genes (iroB, iroC, iroD, and iroN). The presence of these plasmids in high-risk clones suggests their dissemination in our region, which represents a serious health problem.}, } @article {pmid36353749, year = {2022}, author = {Oren, E and Dafna, A and Tzuri, G and Halperin, I and Isaacson, T and Elkabetz, M and Meir, A and Saar, U and Ohali, S and La, T and Romay, C and Tadmor, Y and Schaffer, AA and Buckler, ES and Cohen, R and Burger, J and Gur, A}, title = {Pan-genome and multi-parental framework for high-resolution trait dissection in melon (Cucumis melo).}, journal = {The Plant journal : for cell and molecular biology}, volume = {112}, number = {6}, pages = {1525-1542}, pmid = {36353749}, issn = {1365-313X}, mesh = {*Cucumis melo/genetics ; *Cucurbitaceae/genetics ; Plant Breeding ; Chromosome Mapping ; Phenotype ; }, abstract = {Linking genotype with phenotype is a fundamental goal in biology and requires robust data for both. Recent advances in plant-genome sequencing have expedited comparisons among multiple-related individuals. The abundance of structural genomic within-species variation that has been discovered indicates that a single reference genome cannot represent the complete sequence diversity of a species, leading to the expansion of the pan-genome concept. For high-resolution forward genetics, this unprecedented access to genomic variation should be paralleled and integrated with phenotypic characterization of genetic diversity. We developed a multi-parental framework for trait dissection in melon (Cucumis melo), leveraging a novel pan-genome constructed for this highly variable cucurbit crop. A core subset of 25 diverse founders (MelonCore25), consisting of 24 accessions from the two widely cultivated subspecies of C. melo, encompassing 12 horticultural groups, and 1 feral accession was sequenced using a combination of short- and long-read technologies, and their genomes were assembled de novo. The construction of this melon pan-genome exposed substantial variation in genome size and structure, including detection of ~300 000 structural variants and ~9 million SNPs. A half-diallel derived set of 300 F2 populations, representing all possible MelonCore25 parental combinations, was constructed as a framework for trait dissection through integration with the pan-genome. We demonstrate the potential of this unified framework for genetic analysis of various melon traits, including rind color intensity and pattern, fruit sugar content, and resistance to fungal diseases. We anticipate that utilization of this integrated resource will enhance genetic dissection of important traits and accelerate melon breeding.}, } @article {pmid36350178, year = {2022}, author = {Dong, X and Zhu, M and Li, Y and Huang, D and Wang, L and Yan, C and Zhang, L and Dong, F and Lu, J and Lin, X and Li, K and Bao, Q and Cong, C and Pan, W}, title = {Whole-Genome Sequencing-Based Species Classification, Multilocus Sequence Typing, and Antimicrobial Resistance Mechanism Analysis of the Enterobacter cloacae Complex in Southern China.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0216022}, pmid = {36350178}, issn = {2165-0497}, mesh = {Humans ; Multilocus Sequence Typing ; *Anti-Bacterial Agents/pharmacology ; Enterobacter cloacae ; Retrospective Studies ; Drug Resistance, Bacterial/genetics ; *Enterobacteriaceae Infections/epidemiology/microbiology ; beta-Lactamases/genetics ; Bacterial Proteins/genetics ; China/epidemiology ; Microbial Sensitivity Tests ; Plasmids ; }, abstract = {Members of the Enterobacter cloacae complex (ECC) are important opportunistic nosocomial pathogens that are associated with a great variety of infections. Due to limited data on the genome-based classification of species and investigation of resistance mechanisms, in this work, we collected 172 clinical ECC isolates between 2019 and 2020 from three hospitals in Zhejiang, China and performed a retrospective whole-genome sequencing to analyze their population structure and drug resistance mechanisms. Of the 172 ECC isolates, 160 belonged to 9 classified species, and 12 belonged to unclassified species based on ANI analysis. Most isolates belonged to E. hormaechei (45.14%) followed by E. kobei (13.71%), which contained 126 STs, including 62 novel STs, as determined by multilocus sequence typing (MLST) analysis. Pan-genome analysis of the two ECC species showed that they have an "open" tendency, which indicated that their Pan-genome increased considerably with the addition of new genomes. A total of 80 resistance genes associated with 11 antimicrobial agent categories were identified in the genomes of all the isolates. The most prevailing resistance genes (12/29, 41.38%) were related to β-lactams followed by aminoglycosides. A total of 247 β-lactamase genes were identified, of which the blaACT genes were the most dominant (145/247, 58.70%), followed by the blaTEM genes (21/247, 8.50%). The inherent ACT type β-lactamase genes differed among different species. blaACT-2 and blaACT-3 were only present in E. asburiae, while blaACT-9, blaACT-12, and blaACT-6 exclusively appeared in E. kobei, E. ludwigii, and E. mori. Among the six carbapenemase-encoding genes (blaNDM-1, blaNDM-5, blaIMP-1, blaIMP-4, blaIMP-26, and blaKPC-2) identified, two (blaNDM-1 and blaIMP-1) were identified in an ST78 E. hormaechei isolate. Comparative genomic analysis of the carbapenemase gene-related sequences was performed, and the corresponding genetic structure of these resistance genes was analyzed. Genome-wide molecular characterization of the ECC population and resistance mechanism would offer valuable insights into the effective management of ECC infection in clinical settings. IMPORTANCE The presence and emergence of multiple species/subspecies of ECC have led to diversity and complications at the taxonomic level, which impedes our further understanding of the epidemiology and clinical significance of species/subspecies of ECC. Accurate identification of ECC species is extremely important. Also, it is of great importance to study the carbapenem-resistant genes in ECC and to further understand the mechanism of horizontal transfer of the resistance genes by analyzing the surrounding environment around the genes. The occurrence of ECC carrying two MBL genes also indicates that the selection pressure of bacteria is further increased, suggesting that we need to pay special attention to the emergence of such bacteria in the clinic.}, } @article {pmid36344558, year = {2022}, author = {Otani, H and Udwary, DW and Mouncey, NJ}, title = {Comparative and pangenomic analysis of the genus Streptomyces.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {18909}, pmid = {36344558}, issn = {2045-2322}, support = {DE-AC02-05CH11231//U.S. Department of Energy/ ; }, mesh = {*Streptomyces/metabolism ; Biosynthetic Pathways/genetics ; Secondary Metabolism/genetics ; Sequence Analysis, DNA ; *Polyketides/metabolism ; Multigene Family ; }, abstract = {Streptomycetes are highly metabolically gifted bacteria with the abilities to produce bioproducts that have profound economic and societal importance. These bioproducts are produced by metabolic pathways including those for the biosynthesis of secondary metabolites and catabolism of plant biomass constituents. Advancements in genome sequencing technologies have revealed a wealth of untapped metabolic potential from Streptomyces genomes. Here, we report the largest Streptomyces pangenome generated by using 205 complete genomes. Metabolic potentials of the pangenome and individual genomes were analyzed, revealing degrees of conservation of individual metabolic pathways and strains potentially suitable for metabolic engineering. Of them, Streptomyces bingchenggensis was identified as a potent degrader of plant biomass. Polyketide, non-ribosomal peptide, and gamma-butyrolactone biosynthetic enzymes are primarily strain specific while ectoine and some terpene biosynthetic pathways are highly conserved. A large number of transcription factors associated with secondary metabolism are strain-specific while those controlling basic biological processes are highly conserved. Although the majority of genes involved in morphological development are highly conserved, there are strain-specific varieties which may contribute to fine tuning the timing of cellular differentiation. Overall, these results provide insights into the metabolic potential, regulation and physiology of streptomycetes, which will facilitate further exploitation of these important bacteria.}, } @article {pmid36340844, year = {2022}, author = {Lynch, T and Nandi, T and Jayaprakash, T and Gregson, D and Church, DL}, title = {Genomic analysis of group A Streptococcus isolated during a correctional facility outbreak of MRSA in 2004.}, journal = {Journal of the Association of Medical Microbiology and Infectious Disease Canada = Journal officiel de l'Association pour la microbiologie medicale et l'infectiologie Canada}, volume = {7}, number = {1}, pages = {23-35}, pmid = {36340844}, issn = {2371-0888}, abstract = {BACKGROUND: In 2004-2005, an outbreak of impetigo occurred at a correctional facility during a sentinel outbreak of methicillin- resistant Staphylococcus aureus (MRSA) in Alberta, Canada. Next-generation sequencing (NGS) was used to characterize the group A Streptococcus (GAS) isolates and evaluate whether genomic biomarkers could distinguish between those recovered alone and those co-isolated with S. aureus.

METHODS: Superficial wound swabs collected from all adults with impetigo during this outbreak were cultured using standard methods. NGS was used to characterize and compare all of the GAS and S. aureus genomes.

RESULTS: Fifty-three adults were culture positive for GAS, with a subset of specimens also positive for MRSA (n = 5) or methicillin-sensitive S. aureus (n = 3). Seventeen additional MRSA isolates from this facility from the same time frame (no GAS co-isolates) were also included. All 78 bacterial genomes were analyzed for the presence of known virulence factors, plasmids, and antimicrobial resistance (AMR) genes. Among the GAS isolates were 12 emm types, the most common being 41.2 (n = 27; 51%). GAS genomes were phylogenetically compared with local and public datasets of invasive and non-invasive isolates. GAS genomes had diverse profiles for virulence factors, plasmids, and AMR genes. Pangenome analysis did not identify horizontally transferred genes in the co-infection versus single infections.

CONCLUSIONS: GAS recovered from invasive and non-invasive sources were not genetically distinguishable. Virulence factors, plasmids, and AMR profiles grouped by emm type, and no genetic changes were identified that predict co-infection or horizontal gene transfer between GAS and S. aureus.}, } @article {pmid36336469, year = {2022}, author = {Weigert, S and Perez-Garcia, P and Gisdon, FJ and Gagsteiger, A and Schweinshaut, K and Ullmann, GM and Chow, J and Streit, WR and Höcker, B}, title = {Investigation of the halophilic PET hydrolase PET6 from Vibrio gazogenes.}, journal = {Protein science : a publication of the Protein Society}, volume = {31}, number = {12}, pages = {e4500}, pmid = {36336469}, issn = {1469-896X}, mesh = {Humans ; *Hydrolases/chemistry ; Plastics ; Microplastics ; *Vibrio/genetics ; }, abstract = {The handling of plastic waste and the associated ubiquitous occurrence of microplastic poses one of the biggest challenges of our time. Recent investigations of plastic degrading enzymes have opened new prospects for biological microplastic decomposition as well as recycling applications. For polyethylene terephthalate, in particular, several natural and engineered enzymes are known to have such promising properties. From a previous study that identified new PETase candidates by homology search, we chose the candidate PET6 from the globally distributed, halophilic organism Vibrio gazogenes for further investigation. By mapping the occurrence of Vibrios containing PET6 homologs we demonstrated their ubiquitous prevalence in the pangenome of several Vibrio strains. The biochemical characterization of PET6 showed that PET6 has a comparatively lower activity than other enzymes but also revealed a superior turnover at very high salt concentrations. The crystal structure of PET6 provides structural insights into this adaptation to saline environments. By grafting only a few beneficial mutations from other PET degrading enzymes onto PET6, we increased the activity up to three-fold, demonstrating the evolutionary potential of the enzyme. MD simulations of the variant helped rationalize the mutational effects of those mutants and elucidate the interaction of the enzyme with a PET substrate. With tremendous amounts of plastic waste in the Ocean and the prevalence of Vibrio gazogenes in marine biofilms and estuarine marshes, our findings suggest that Vibrio and the PET6 enzyme are worthy subjects to study the PET degradation in marine environments.}, } @article {pmid36333324, year = {2022}, author = {Luo, X and Kang, X and Schönhuth, A}, title = {VeChat: correcting errors in long reads using variation graphs.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {6657}, pmid = {36333324}, issn = {2041-1723}, mesh = {Sequence Analysis, DNA/methods ; *Algorithms ; *Nanopores ; Haplotypes ; Data Analysis ; High-Throughput Nucleotide Sequencing ; Software ; }, abstract = {Error correction is the canonical first step in long-read sequencing data analysis. Current self-correction methods, however, are affected by consensus sequence induced biases that mask true variants in haplotypes of lower frequency showing in mixed samples. Unlike consensus sequence templates, graph-based reference systems are not affected by such biases, so do not mistakenly mask true variants as errors. We present VeChat, as an approach to implement this idea: VeChat is based on variation graphs, as a popular type of data structure for pangenome reference systems. Extensive benchmarking experiments demonstrate that long reads corrected by VeChat contain 4 to 15 (Pacific Biosciences) and 1 to 10 times (Oxford Nanopore Technologies) less errors than when being corrected by state of the art approaches. Further, using VeChat prior to long-read assembly significantly improves the haplotype awareness of the assemblies. VeChat is an easy-to-use open-source tool and publicly available at https://github.com/HaploKit/vechat .}, } @article {pmid36330071, year = {2022}, author = {Alsowayeh, N and Albutti, A}, title = {Designing a novel chimeric multi-epitope vaccine against Burkholderia pseudomallei, a causative agent of melioidosis.}, journal = {Frontiers in medicine}, volume = {9}, number = {}, pages = {945938}, pmid = {36330071}, issn = {2296-858X}, abstract = {Burkholderia pseudomallei, a gram-negative soil-dwelling bacterium, is primarily considered a causative agent of melioidosis infection in both animals and humans. Despite the severity of the disease, there is currently no licensed vaccine on the market. The development of an effective vaccine against B. pseudomallei could help prevent the spread of infection. The purpose of this study was to develop a multi-epitope-based vaccine against B. pseudomallei using advanced bacterial pan-genome analysis. A total of four proteins were prioritized for epitope prediction by using multiple subtractive proteomics filters. Following that, a multi-epitopes based chimeric vaccine construct was modeled and joined with an adjuvant to improve the potency of the designed vaccine construct. The structure of the construct was predicted and analyzed for flexibility. A population coverage analysis was performed to evaluate the broad-spectrum applicability of B. pseudomallei. The computed combined world population coverage was 99.74%. Molecular docking analysis was applied further to evaluate the binding efficacy of the designed vaccine construct with the human toll-like receptors-5 (TLR-5). Furthermore, the dynamic behavior and stability of the docked complexes were investigated using molecular dynamics simulation, and the binding free energy determined for Vaccine-TLR-5 was delta total -168.3588. The docking result revealed that the vaccine construct may elicit a suitable immunological response within the host body. Hence, we believe that the designed in-silico vaccine could be helpful for experimentalists in the formulation of a highly effective vaccine for B. pseudomallei.}, } @article {pmid36326919, year = {2022}, author = {Amulyasai, B and Anusha, R and Sasikala, C and Ramana, CV}, title = {Phylogenomic analysis of a metagenome-assembled genome indicates a new taxon of an anoxygenic phototroph bacterium in the family Chromatiaceae and the proposal of "Candidatus Thioaporhodococcus" gen. nov.}, journal = {Archives of microbiology}, volume = {204}, number = {12}, pages = {688}, pmid = {36326919}, issn = {1432-072X}, mesh = {Phylogeny ; *Metagenome ; RNA, Ribosomal, 16S/genetics ; DNA, Bacterial/genetics ; Sequence Analysis, DNA ; *Chromatiaceae ; Bacterial Typing Techniques ; Fatty Acids/analysis ; }, abstract = {In this study, three metagenome-assembled genomes of a sediment sample were constructed. A Bin1 (JB001) genome was identified as a photo-litho-auto/heterotroph (purple sulfur bacteria) bacterium with the ability to fix nitrogen, tolerate salt, and to produce bacteriochlorophyll a. It has a genome length of 4.1 Mb and a G + C content of 64.9%. Phylogenetic studies based on concatenated 92 core genes and photosynthetic genes (pufLM and bchY) showed that Bin JB001 is related to Thiococcus pfennigii, "Thioflavicoccus mobilis" and to the Lamprocystis purpurea lineage. Bin JB001 and its closely related members were subjected to the genome-based study of phenotypic and phylogenomic analysis. Genomic similarity indices (dDDH and ANI) showed that Bin JB001 could be defined as a novel species. The average amino acid identity (AAI) and percentage of conserved proteins (POCP) values were below 60 and 50%, respectively. The pan-genome analysis indicated that the pan-genome was an open type wherein Bin JB001 had 855 core genes. This study shows that the binned genome, Bin JB001 could represent a novel species of a new genus under the family Chromatiaceae, for which the name "Candidatus Thioaporhodococcus sediminis" gen. nov. sp. nov. is proposed.}, } @article {pmid36326658, year = {2022}, author = {Kittiwan, N and Calland, JK and Mourkas, E and Hitchings, MD and Murray, S and Tadee, P and Tadee, P and Duangsonk, K and Meric, G and Sheppard, SK and Patchanee, P and Pascoe, B}, title = {Genetic diversity and variation in antimicrobial-resistance determinants of non-serotype 2 Streptococcus suis isolates from healthy pigs.}, journal = {Microbial genomics}, volume = {8}, number = {11}, pages = {}, pmid = {36326658}, issn = {2057-5858}, support = {MR/T030062/1//Medical Research Council/United Kingdom ; MR/L015080/1//Medical Research Council/United Kingdom ; }, mesh = {Swine ; Animals ; *Streptococcus suis/genetics ; *Streptococcal Infections/veterinary/genetics ; Anti-Bacterial Agents/pharmacology ; Genetic Variation ; }, abstract = {Streptococcus suis is a leading cause of bacterial meningitis in South-East Asia, with frequent zoonotic transfer to humans associated with close contact with pigs. A small number of invasive lineages are responsible for endemic infection in the swine industry, causing considerable global economic losses. A lack of surveillance and a rising trend in clinical treatment failure has raised concerns of growing antimicrobial resistance (AMR) among invasive S. suis . Gene flow between healthy and disease isolates is poorly understood and, in this study, we sample and sequence a collection of isolates predominantly from healthy pigs in Chiang Mai province, Northern Thailand. Pangenome characterization identified extensive genetic diversity and frequent AMR carriage in isolates from healthy pigs. Multiple AMR genes were identified, conferring resistance to aminoglycosides, lincosamides, tetracycline and macrolides. All isolates were non-susceptible to three or more different antimicrobial classes, and 75 % of non-serotype 2 isolates were non-susceptible to six or more classes (compared to 37.5 % of serotype 2 isolates). AMR genes were found on integrative and conjugative elements previously observed in other species, suggesting a mobile gene pool that can be accessed by invasive disease isolates. This article contains data hosted by Microreact.}, } @article {pmid36324059, year = {2023}, author = {Amas, JC and Thomas, WJW and Zhang, Y and Edwards, D and Batley, J}, title = {Key Advances in the New Era of Genomics-Assisted Disease Resistance Improvement of Brassica Species.}, journal = {Phytopathology}, volume = {113}, number = {5}, pages = {771-785}, doi = {10.1094/PHYTO-08-22-0289-FI}, pmid = {36324059}, issn = {0031-949X}, mesh = {*Brassica/genetics ; Disease Resistance/genetics ; Genome, Plant/genetics ; Plant Diseases/genetics ; Plant Breeding ; Genomics ; }, abstract = {Disease resistance improvement remains a major focus in breeding programs as diseases continue to devastate Brassica production systems due to intensive cultivation and climate change. Genomics has paved the way to understand the complex genomes of Brassicas, which has been pivotal in the dissection of the genetic underpinnings of agronomic traits driving the development of superior cultivars. The new era of genomics-assisted disease resistance breeding has been marked by the development of high-quality genome references, accelerating the identification of disease resistance genes controlling both qualitative (major) gene and quantitative resistance. This facilitates the development of molecular markers for marker assisted selection and enables genome editing approaches for targeted gene manipulation to enhance the genetic value of disease resistance traits. This review summarizes the key advances in the development of genomic resources for Brassica species, focusing on improved genome references, based on long-read sequencing technologies and pangenome assemblies. This is further supported by the advances in pathogen genomics, which have resulted in the discovery of pathogenicity factors, complementing the mining of disease resistance genes in the host. Recognizing the co-evolutionary arms race between the host and pathogen, it is critical to identify novel resistance genes using crop wild relatives and synthetic cultivars or through genetic manipulation via genome-editing to sustain the development of superior cultivars. Integrating these key advances with new breeding techniques and improved phenotyping using advanced data analysis platforms will make disease resistance improvement in Brassica species more efficient and responsive to current and future demands.}, } @article {pmid36322504, year = {2022}, author = {Maynard-Smith, L and Derrick, JP and Borrow, R and Lucidarme, J and Maiden, MCJ and Heyderman, RS and Harrison, OB}, title = {Genome-Wide Association Studies Identify an Association of Transferrin Binding Protein B Variation and Invasive Serogroup Y Meningococcal Disease in Older Adults.}, journal = {The Journal of infectious diseases}, volume = {226}, number = {12}, pages = {2204-2214}, pmid = {36322504}, issn = {1537-6613}, support = {ACF-2015-18-029/DH_/Department of Health/United Kingdom ; 218205/Z/19/Z/WT_/Wellcome Trust/United Kingdom ; PR-OD-101720007/DH_/Department of Health/United Kingdom ; 214374/Z/18/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Humans ; Aged ; Neisseria meningitidis, Serogroup Y/genetics ; Transferrin-Binding Protein B/genetics ; Genome-Wide Association Study ; Serogroup ; Phylogeny ; *Meningococcal Infections/genetics/microbiology ; *Neisseria meningitidis ; Iron ; *Meningococcal Vaccines ; }, abstract = {BACKGROUND: Neisseria meningitidis serogroup Y, especially ST-23 clonal complex (Y:cc23), represents a larger proportion of invasive meningococcal disease (IMD) in older adults compared to younger individuals. This study explored the meningococcal genetic variation underlying this association.

METHODS: Maximum-likelihood phylogenies and the pangenome were analyzed using whole-genome sequence (WGS) data from 200 Y:cc23 isolates in the Neisseria PubMLST database. Genome-wide association studies (GWAS) were performed on WGS data from 250 Y:cc23 isolates from individuals with IMD aged ≥65 years versus < 65 years.

RESULTS: Y:cc23 meningococcal variants did not cluster by age group or disease phenotype in phylogenetic analyses. Pangenome comparisons found no differences in presence or absence of genes in IMD isolates from the different age groups. GWAS identified differences in nucleotide polymorphisms within the transferrin-binding protein B (tbpB) gene in isolates from individuals ≥65 years of age. TbpB structure modelling suggests these may impact binding of human transferrin.

CONCLUSIONS: These data suggest differential iron scavenging capacity amongst Y:cc23 meningococci isolated from older compared to younger patients. Iron acquisition is essential for many bacterial pathogens including the meningococcus. These polymorphisms may facilitate colonization, thereby increasing the risk of disease in vulnerable older people with altered nasopharyngeal microbiomes and nutritional status.}, } @article {pmid36318249, year = {2023}, author = {Martin, FJ and Amode, MR and Aneja, A and Austine-Orimoloye, O and Azov, AG and Barnes, I and Becker, A and Bennett, R and Berry, A and Bhai, J and Bhurji, SK and Bignell, A and Boddu, S and Branco Lins, PR and Brooks, L and Ramaraju, SB and Charkhchi, M and Cockburn, A and Da Rin Fiorretto, L and Davidson, C and Dodiya, K and Donaldson, S and El Houdaigui, B and El Naboulsi, T and Fatima, R and Giron, CG and Genez, T and Ghattaoraya, GS and Martinez, JG and Guijarro, C and Hardy, M and Hollis, Z and Hourlier, T and Hunt, T and Kay, M and Kaykala, V and Le, T and Lemos, D and Marques-Coelho, D and Marugán, JC and Merino, GA and Mirabueno, LP and Mushtaq, A and Hossain, SN and Ogeh, DN and Sakthivel, MP and Parker, A and Perry, M and Piližota, I and Prosovetskaia, I and Pérez-Silva, JG and Salam, AIA and Saraiva-Agostinho, N and Schuilenburg, H and Sheppard, D and Sinha, S and Sipos, B and Stark, W and Steed, E and Sukumaran, R and Sumathipala, D and Suner, MM and Surapaneni, L and Sutinen, K and Szpak, M and Tricomi, FF and Urbina-Gómez, D and Veidenberg, A and Walsh, TA and Walts, B and Wass, E and Willhoft, N and Allen, J and Alvarez-Jarreta, J and Chakiachvili, M and Flint, B and Giorgetti, S and Haggerty, L and Ilsley, GR and Loveland, JE and Moore, B and Mudge, JM and Tate, J and Thybert, D and Trevanion, SJ and Winterbottom, A and Frankish, A and Hunt, SE and Ruffier, M and Cunningham, F and Dyer, S and Finn, RD and Howe, KL and Harrison, PW and Yates, AD and Flicek, P}, title = {Ensembl 2023.}, journal = {Nucleic acids research}, volume = {51}, number = {D1}, pages = {D933-D941}, pmid = {36318249}, issn = {1362-4962}, support = {U41 HG010972/HG/NHGRI NIH HHS/United States ; U41 HG007234/HG/NHGRI NIH HHS/United States ; U24 HG007234/HG/NHGRI NIH HHS/United States ; R01 HG010485/HG/NHGRI NIH HHS/United States ; /WT_/Wellcome Trust/United Kingdom ; }, mesh = {Animals ; Humans ; *Software ; *Databases, Genetic ; Molecular Sequence Annotation ; Genomics ; Genome ; }, abstract = {Ensembl (https://www.ensembl.org) has produced high-quality genomic resources for vertebrates and model organisms for more than twenty years. During that time, our resources, services and tools have continually evolved in line with both the publicly available genome data and the downstream research and applications that utilise the Ensembl platform. In recent years we have witnessed a dramatic shift in the genomic landscape. There has been a large increase in the number of high-quality reference genomes through global biodiversity initiatives. In parallel, there have been major advances towards pangenome representations of higher species, where many alternative genome assemblies representing different breeds, cultivars, strains and haplotypes are now available. In order to support these efforts and accelerate downstream research, it is our goal at Ensembl to create high-quality annotations, tools and services for species across the tree of life. Here, we report our resources for popular reference genomes, the dramatic growth of our annotations (including haplotypes from the first human pangenome graphs), updates to the Ensembl Variant Effect Predictor (VEP), interactive protein structure predictions from AlphaFold DB, and the beta release of our new website.}, } @article {pmid36318042, year = {2022}, author = {Liu, N and Liu, D and Li, K and Hu, S and He, Z}, title = {Pan-Genome Analysis of Staphylococcus aureus Reveals Key Factors Influencing Genomic Plasticity.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0311722}, pmid = {36318042}, issn = {2165-0497}, mesh = {Humans ; *Staphylococcus aureus ; Multilocus Sequence Typing/methods ; Phylogeny ; *Staphylococcal Infections/microbiology ; Genome, Bacterial ; Genomics ; }, abstract = {The massive quantities of bacterial genomic data being generated have facilitated in-depth analyses of bacteria for pan-genomic studies. However, the pan-genome compositions of one species differed significantly between different studies, so we used Staphylococcus aureus as a model organism to explore the influences driving bacterial pan-genome composition. We selected a series of diverse strains for pan-genomic analysis to explore the pan-genomic composition of S. aureus at the species level and the actual contribution of influencing factors (sequence type [ST], source of isolation, country of isolation, and date of collection) to pan-genome composition. We found that the distribution of core genes in bacterial populations restrained under different conditions differed significantly and showed "local core gene regions" in the same ST. Therefore, we propose that ST may be a key factor driving the dynamic distribution of bacterial genomes and that phylogenetic analyses using whole-genome alignment are no longer appropriate in populations containing multiple ST strains. Pan-genomic analysis showed that some of the housekeeping genes of multilocus sequence typing (MLST) are carried at less than 60% in S. aureus strains. Consequently, we propose a new set of marker genes for the classification of S. aureus, which provides a reference for finding a new set of housekeeping genes to apply to MLST. In this study, we explored the role of driving factors influencing pan-genome composition, providing new insights into the study of bacterial pan-genomes. IMPORTANCE We sought to explore the impact of driving factors influencing pan-genome composition using Staphylococcus aureus as a model organism to provide new insights for the study of bacterial pan-genomes. We believe that the sequence type (ST) of the strains under consideration plays a significant role in the dynamic distribution of bacterial genes. Our findings indicate that there are a certain number of essential genes in Staphylococcus aureus; however, the number of core genes is not as high as previously thought. The new classification method proposed herein suggests that a new set of housekeeping genes more suitable for Staphylococcus aureus must be identified to improve the current classification status of this species.}, } @article {pmid36317888, year = {2022}, author = {Yuan, Y and Seif, Y and Rychel, K and Yoo, R and Chauhan, S and Poudel, S and Al-Bulushi, T and Palsson, BO and Sastry, AV}, title = {Pan-Genome Analysis of Transcriptional Regulation in Six Salmonella enterica Serovar Typhimurium Strains Reveals Their Different Regulatory Structures.}, journal = {mSystems}, volume = {7}, number = {6}, pages = {e0046722}, pmid = {36317888}, issn = {2379-5077}, mesh = {Humans ; *Salmonella enterica/genetics ; Serogroup ; Salmonella typhimurium/genetics ; Gene Expression Regulation ; Gene Expression Profiling ; }, abstract = {Establishing transcriptional regulatory networks (TRNs) in bacteria has been limited to well-characterized model strains. Using machine learning methods, we established the transcriptional regulatory networks of six Salmonella enterica serovar Typhimurium strains from their transcriptomes. By decomposing a compendia of RNA sequencing (RNA-seq) data with independent component analysis, we obtained 400 independently modulated sets of genes, called iModulons. We (i) performed pan-genome analysis of the phylogroup structure of S. Typhimurium and analyzed the iModulons against this background, (ii) revealed different genetic signatures in pathogenicity islands that explained phenotypes, (iii) discovered three transport iModulons linked to antibiotic resistance, (iv) described concerted responses to cationic antimicrobial peptides, and (v) uncovered new regulons. Thus, by combining pan-genome and transcriptomic analytics, we revealed variations in TRNs across six strains of serovar Typhimurium. IMPORTANCE Salmonella enterica serovar Typhimurium is a pathogen involved in human nontyphoidal infections. Treating S. Typhimurium infections is difficult due to the species's dynamic adaptation to its environment, which is dictated by a complex transcriptional regulatory network (TRN) that is different across strains. In this study, we describe the use of independent component analysis to characterize the differential TRNs across the S. Typhimurium pan-genome using a compendium of high-quality RNA-seq data. This approach provided unprecedented insights into the differences between regulation of key cellular functions and pathogenicity in the different strains. The study provides an impetus to initiate a large-scale effort to reveal the TRN differences between the major phylogroups of the pathogenic bacteria, which could fundamentally impact personalizing treatments of bacterial pathogens.}, } @article {pmid36314968, year = {2022}, author = {Hur, JI and Kim, J and Ryu, S and Jeon, B}, title = {Phylogenetic Association and Genetic Factors in Cold Stress Tolerance in Campylobacter jejuni.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0268122}, pmid = {36314968}, issn = {2165-0497}, mesh = {Animals ; Humans ; *Campylobacter jejuni/genetics ; Phylogeny ; Multilocus Sequence Typing ; Cold-Shock Response/genetics ; Cold Temperature ; Chickens ; *Campylobacter Infections ; }, abstract = {Campylobacter jejuni is a major foodborne pathogen transmitted to humans primarily via contaminated poultry meat. Since poultry meat is generally processed, distributed, and stored in the cold chain, the survival of C. jejuni at refrigeration temperatures crucially affects human exposure to C. jejuni. Here, we investigated genetic factors associated with cold stress tolerance in C. jejuni. Seventy-nine C. jejuni strains isolated from retail raw chicken exhibited different survival levels at 4°C for 21 days. Multilocus sequence typing (MLST) clonal complex 21 (CC-21) and CC-443 were dominant among cold stress-tolerant strains, whereas CC-45 was common among cold stress-sensitive strains. Genome-wide average nucleotide identity (ANI) analysis identified a phylogenetic cluster associated with cold stress tolerance. Moreover, a pangenome analysis revealed 58 genes distinctively present in the cold stress-tolerant phylogenetic cluster. Among these 58 genes, cfrA, encoding the ferric enterobactin receptor involved in ion transport and metabolism, was selected for further analysis. Remarkably, the viability of a ΔcfrA mutant at 4°C was significantly decreased, while the levels of total reactive oxygen species and intracellular iron exceeded those of the wild type. Additionally, a knockout mutation of cfrA also significantly decreased the viability of three cold stress-tolerant isolates at 4°C, confirming the role of cfrA in cold stress tolerance. The results of this study demonstrate that unique phylogenetic clusters of C. jejuni associated with cold stress tolerance exist and that cfrA is a genetic factor contributing to cold stress tolerance in C. jejuni. IMPORTANCE The tolerance of foodborne pathogens to environmental stresses significantly affects food safety. Several studies have demonstrated that C. jejuni survives extended exposures to low temperatures, but the mechanisms of cold stress tolerance are not fully understood. Here, we demonstrate that C. jejuni strains in certain phylogenetic groups exhibit increased tolerance to cold stress. Notably, cfrA is present in the phylogenetic cluster associated with cold stress tolerance and plays a role in the survival of C. jejuni at low temperatures by alleviating oxidative stress. This is the first study to discover phylogenetic associations involving cold stress tolerance and to identify genetic elements conferring cold stress tolerance to C. jejuni.}, } @article {pmid36307757, year = {2022}, author = {Chen, Y and Miao, Y and Bai, W and Lin, K and Pang, E}, title = {Characteristics and potential functional effects of long insertions in Asian butternuts.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {732}, pmid = {36307757}, issn = {1471-2164}, support = {31571361//the National Natural Science Foundation of China/ ; }, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Genome ; *Asian People ; }, abstract = {BACKGROUND: Structural variants (SVs) play important roles in adaptation evolution and species diversification. Especially, in plants, many phenotypes of response to the environment were found to be associated with SVs. Despite the prevalence and significance of SVs, long insertions remain poorly detected and studied in all but model species.

RESULTS: We used whole-genome resequencing of paired reads from 80 Asian butternuts to detect long insertions and further analyse their characteristics and potential functional effects. By combining of mapping-based and de novo assembly-based methods, we obtained a multiple related species pangenome representing higher taxonomic groups. We obtained 89,312 distinct contigs totaling 147,773,999 base pair (bp) of new sequences, of which 347 were putative long insertions placed in the reference genome. Most of the putative long insertions appeared in multiple species; in contrast, only 62 putative long insertions appeared in one species, which may be involved in the response to the environment. 65 putative long insertions fell into 61 distinct protein-coding genes involved in plant development, and 105 putative long insertions fell into upstream of 106 distinct protein-coding genes involved in cellular respiration. 3,367 genes were annotated in 2,606 contigs. We propose PLAINS (https://github.com/CMB-BNU/PLAINS.git), a streamlined, comprehensive pipeline for the prediction and analysis of long insertions using whole-genome resequencing.

CONCLUSIONS: Our study lays down an important foundation for further whole-genome long insertion studies, allowing the investigation of their effects by experiments.}, } @article {pmid36303546, year = {2022}, author = {Zia, K and Rao, MJ and Sadaqat, M and Azeem, F and Fatima, K and Tahir Ul Qamar, M and Alshammari, A and Alharbi, M}, title = {Pangenome-wide analysis of cyclic nucleotide-gated channel (CNGC) gene family in citrus Spp. Revealed their intraspecies diversity and potential roles in abiotic stress tolerance.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {1034921}, pmid = {36303546}, issn = {1664-8021}, abstract = {Cyclic nucleotide-gated channels (CNGC) gene family has been found to be involved in physiological processes including signaling pathways, environmental stresses, plant growth, and development. This gene family of non-selective cation channels is known to regulate the uptake of calcium and is reported in several plant species. The pangenome-wide studies enable researchers to understand the genetic diversity comprehensively; as a comparative analysis of multiple plant species or member of a species at once helps to better understand the evolutionary relationships and diversity present among them. In the current study, pangenome-wide analysis of the CNGC gene family has been performed on five Citrus species. As a result, a total of 32 genes in Citrus sinensis, 27 genes in Citrus recticulata, 30 genes in Citrus grandis, 31 genes in Atalantia buxfolia, and 30 genes in Poncirus trifoliata were identified. In addition, two unique genes CNGC13 and CNGC14 were identified, which may have potential roles. All the identified CNGC genes were unevenly distributed on 9 chromosomes except P. trifoliata had genes distributed on 7 chromosomes and were classified into four major groups and two sub-groups namely I, II, III, IV-A, and IV-B. Cyclic nucleotide binding (CNB) motif, calmodulin-binding motif (CaMB), and motif for IQ-domain were conserved in Citrus Spp. Intron exon structures of citrus species were not exactly as same as the gene structures of Arabidopsis. The majority of cis-regulatory elements (CREs) were light responsive and others include growth, development, and stress-related indicating potential roles of the CNGC gene family in these functions. Both segmental and tandem duplication were involved in the expansion of the CNGC gene family in Citrus Spp. The miRNAs are involved in the response of CsCNGC genes towards drought stress along with having regulatory association in the expression of these genes. Protein- Protein interaction (PPI) analysis also showed the interaction of CNGC proteins with other CNGCs which suggested their potential role in pathways regulating different biological processes. GO enrichment revealed that CNGC genes were involved in the transport of ions across membranes. Furthermore, tissue-specific expression patterns of leaves sample of C. sinensis were studied under drought stress. Out of 32 genes of C. sinensis 3 genes i.e., CsCNGC1.4, CsCNGC2.1, and CsCNGC4.2 were highly up-regulated, and only CsCNGC4.6 was highly down-regulated. The qRT-PCR analysis also showed that CNGC genes were highly expressed after treatment with drought stress, while gene expression was lower under controlled conditions. This work includes findings based on multiple genomes instead of one, therefore, this will provide more genomic information rather than single genome-based studies. These findings will serve as a basis for further functional insights into the CNGC gene family.}, } @article {pmid36303348, year = {2023}, author = {Zhang, J and Xu, J and Lei, H and Liang, H and Li, X and Li, B}, title = {The development of variation-based rifampicin resistance in Staphylococcus aureus deciphered through genomic and transcriptomic study.}, journal = {Journal of hazardous materials}, volume = {442}, number = {}, pages = {130112}, doi = {10.1016/j.jhazmat.2022.130112}, pmid = {36303348}, issn = {1873-3336}, mesh = {*Rifampin/pharmacology ; *Staphylococcus aureus/genetics ; Drug Resistance, Bacterial/genetics ; Transcriptome ; DNA-Directed RNA Polymerases/genetics/pharmacology ; Microbial Sensitivity Tests ; Anti-Bacterial Agents/pharmacology ; Mutation ; Genomics ; Bacterial Proteins/genetics ; }, abstract = {Rifampicin (RIF) resistance imposes a challenge on the antimicrobial treatment of pathogen infections. Figuring out the development mechanism of RIF resistance is critical to improving antimicrobial therapy strategy in clinics and biological treatment strategy of RIF polluted sewage in environmental engineering. The RIF resistance development of Staphylococcus aureus (S. aureus) with exposure to RIF at sub-inhibitory concentrations was comprehensively investigated via genomic and transcriptomic approaches in this study. RIF minimal inhibitory concentration (MIC) for S. aureus rapidly increased from 0.032 to 256 mg/L. Membrane permeability decrease, biofilm formation enhancement, and ROS production increase associated with RIF resistance were observed in RIF-induced strains. Through comparative genomic analysis, mutations in rpoB and rpoC were considered to be associated with RIF resistance in S. aureus mutants. Pan-genome-wide single-nucleotide variant analysis indicated that mutations at rpoB-1412, rpoB-1451, and rpoB-1457 were prevalent in 13849 public genomes of S. aureus, while mutations at rpoB-2256, and rpoC-3092 were first discovered in this study. The panorama of adaptative alteration of cellular physiological processes was observed via transcriptomic analysis. The oxidation pressure responses, metabolism, transporters, virulence factors, and multiple steps of DNA and RNA machinery were found to be perturbed by RIF in S. aureus.}, } @article {pmid36301610, year = {2022}, author = {Leigh, RJ and McKenna, C and McWade, R and Lynch, B and Walsh, F}, title = {Comparative genomics and pangenomics of vancomycin-resistant and susceptible Enterococcus faecium from Irish hospitals.}, journal = {Journal of medical microbiology}, volume = {71}, number = {10}, pages = {}, doi = {10.1099/jmm.0.001590}, pmid = {36301610}, issn = {1473-5644}, mesh = {Humans ; *Enterococcus faecium/genetics ; Vancomycin Resistance/genetics ; Vancomycin/pharmacology ; *Gram-Positive Bacterial Infections/epidemiology ; Hospitals ; Genomics ; Anti-Bacterial Agents/pharmacology ; *Vancomycin-Resistant Enterococci/genetics ; Bacterial Proteins/genetics ; }, abstract = {Introduction. Enterococcus faecium has emerged as an important nosocomial pathogen, which is increasingly difficult to treat due to the genetic acquisition of vancomycin resistance. Ireland has a recalcitrant vancomycin-resistant bloodstream infection rate compared to other developed countries.Hypothesis/Gap statement. Vancomycin resistance rates persist amongst E. faecium isolates from Irish hospitals. The evolutionary genomics governing these trends have not been fully elucidated.Methodology. A set of 28 vancomycin-resistant isolates was sequenced to construct a dataset alongside 61 other publicly available Irish genomes. This dataset was extensively analysed using in silico methodologies (comparative genomics, pangenomics, phylogenetics, genotypics and comparative functional analyses) to uncover distinct evolutionary, coevolutionary and clinically relevant population trends.Results. These results suggest that a stable (in terms of genome size, GC% and number of genes), yet genetically diverse population (in terms of gene content) of E. faecium persists in Ireland with acquired resistance arising via plasmid acquisition (vanA) or, to a lesser extent, chromosomal recombination (vanB). Population analysis revealed five clusters with one cluster partitioned into four clades which transcend isolation dates. Pangenomic and recombination analyses revealed an open (whole genome and chromosomal specific) pangenome illustrating a rampant evolutionary pattern. Comparative resistomics and virulomics uncovered distinct chromosomal and mobilomal propensity for multidrug resistance, widespread chromosomal point-mutation-mediated resistance and chromosomally harboured arsenals of virulence factors. Interestingly, a potential difference in biofilm formation strategies was highlighted by coevolutionary analysis, suggesting differential biofilm genotypes between vanA and vanB isolates.Conclusions. These results highlight the evolutionary history of Irish E. faecium isolates and may provide insight into underlying infection dynamics in a clinical setting. Due to the apparent ease of vancomycin resistance acquisition over time, susceptible E. faecium should be concurrently reduced in Irish hospitals to mitigate potential resistant infections.}, } @article {pmid36298594, year = {2022}, author = {Nawaz, M and Ullah, A and Al-Harbi, AI and Haq, MU and Hameed, AR and Ahmad, S and Aziz, A and Raziq, K and Khan, S and Irfan, M and Muhammad, R}, title = {Genome-Based Multi-Antigenic Epitopes Vaccine Construct Designing against Staphylococcus hominis Using Reverse Vaccinology and Biophysical Approaches.}, journal = {Vaccines}, volume = {10}, number = {10}, pages = {}, pmid = {36298594}, issn = {2076-393X}, abstract = {Staphylococcus hominis is a Gram-positive bacterium from the staphylococcus genus; it is also a member of coagulase-negative staphylococci because of its opportunistic nature and ability to cause life-threatening bloodstream infections in immunocompromised patients. Gram-positive and opportunistic bacteria have become a major concern for the medical community. It has also drawn the attention of scientists due to the evaluation of immune evasion tactics and the development of multidrug-resistant strains. This prompted the need to explore novel therapeutic approaches as an alternative to antibiotics. The current study aimed to develop a broad-spectrum, multi-epitope vaccine to control bacterial infections and reduce the burden on healthcare systems. A computational framework was designed to filter the immunogenic potent vaccine candidate. This framework consists of pan-genomics, subtractive proteomics, and immunoinformatics approaches to prioritize vaccine candidates. A total of 12,285 core proteins were obtained using a pan-genome analysis of all strains. The screening of the core proteins resulted in the selection of only two proteins for the next epitope prediction phase. Eleven B-cell derived T-cell epitopes were selected that met the criteria of different immunoinformatics approaches such as allergenicity, antigenicity, immunogenicity, and toxicity. A vaccine construct was formulated using EAAAK and GPGPG linkers and a cholera toxin B subunit. This formulated vaccine construct was further used for downward analysis. The vaccine was loop refined and improved for structure stability through disulfide engineering. For an efficient expression, the codons were optimized as per the usage pattern of the E coli (K12) expression system. The top three refined docked complexes of the vaccine that docked with the MHC-I, MHC-II, and TLR-4 receptors were selected, which proved the best binding potential of the vaccine with immune receptors; this was followed by molecular dynamic simulations. The results indicate the best intermolecular bonding between immune receptors and vaccine epitopes and that they are exposed to the host's immune system. Finally, the binding energies were calculated to confirm the binding stability of the docked complexes. This work aimed to provide a manageable list of immunogenic and antigenic epitopes that could be used as potent vaccine candidates for experimental in vivo and in vitro studies.}, } @article {pmid36296313, year = {2022}, author = {Liu, Y and Cui, X and Yang, R and Zhang, Y and Xu, Y and Liu, G and Zhang, B and Wang, J and Wang, X and Zhang, W and Chen, T and Zhang, G}, title = {Genomic Insights into the Radiation-Resistant Capability of Sphingomonas qomolangmaensis S5-59[T] and Sphingomonas glaciei S8-45[T], Two Novel Bacteria from the North Slope of Mount Everest.}, journal = {Microorganisms}, volume = {10}, number = {10}, pages = {}, pmid = {36296313}, issn = {2076-2607}, abstract = {Mount Everest provides natural advantages to finding radiation-resistant extremophiles that are functionally mechanistic and possess commercial significance. (1) Background: Two bacterial strains, designated S5-59T and S8-45T, were isolated from moraine samples collected from the north slope of Mount Everest at altitudes of 5700m and 5100m above sea level. (2) Methods: The present study investigated the polyphasic features and genomic characteristics of S5-59[T] and S8-45[T]. (3) Results: The major fatty acids and the predominant respiratory menaquinone of S5-59[T] and S8-45[T] were summed as feature 3 (comprising C16:1 ω6c and/or C16:1 ω7c) and ubiquinone-10 (Q-10). Phylogenetic analyses based on 16S rRNA sequences and average nucleotide identity values among these two strains and their reference type strains were below the species demarcation thresholds of 98.65% and 95%. Strains S5-59[T] and S8-45[T] harbored great radiation resistance. The genomic analyses showed that DNA damage repair genes, such as mutL, mutS, radA, radC, recF, recN, etc., were present in the S5-59[T] and S8-45[T] strains. Additionally, strain S5-59[T] possessed more genes related to DNA protection proteins. The pan-genome analysis and horizontal gene transfers revealed that strains of Sphingomonas had a consistently homologous genetic evolutionary radiation resistance. Moreover, enzymatic antioxidative proteins also served critical roles in converting ROS into harmless molecules that resulted in resistance to radiation. Further, pigments and carotenoids such as zeaxanthin and alkylresorcinols of the non-enzymatic antioxidative system were also predicted to protect them from radiation. (4) Conclusions: Type strains S5-59[T] (=JCM 35564T =GDMCC 1.3193T) and S8-45[T] (=JCM 34749T =GDMCC 1.2715T) represent two novel species of the genus Sphingomonas with the proposed name Sphingomonas qomolangmaensis sp. nov. and Sphingomonas glaciei sp. nov. The type strains, S5-59[T] and S8-45[T], were assessed in a deeply genomic study of their radiation-resistant mechanisms and this thus resulted in a further understanding of their greater potential application for the development of anti-radiation protective drugs.}, } @article {pmid36290512, year = {2022}, author = {Zhang, Z and Guo, Y and Yang, F and Li, J}, title = {Pan-Genome Analysis Reveals Functional Divergences in Gut-Restricted Gilliamella and Snodgrassella.}, journal = {Bioengineering (Basel, Switzerland)}, volume = {9}, number = {10}, pages = {}, pmid = {36290512}, issn = {2306-5354}, abstract = {Gilliamella and Snodgrassella, members of core gut microbiota in corbiculate bees, have high species diversity and adaptability to a wide range of hosts. In this study, we performed species taxonomy and phylogenetic analysis for Gilliamella and Snodgrassella strains that we isolated in our laboratory, in combination with published whole-genome. Functional effects of accessory and unique genes were investigated by KEGG category and pathway annotation in pan-genome analysis. Consequently, in Gilliamella, we inferred the importance of carbohydrate metabolism, amino acid metabolism, membrane transport, energy metabolism, and metabolism of cofactors and vitamins in accessory or unique genes. The pathway mentioned above, plus infectious disease, lipid metabolism, nucleotide metabolism as well as replication and repair exert a pivotal role in accessory or unique genes of Snodgrassella. Further analysis revealed the existence of functional differentiation of accessory and unique genes among Apis-derived genomes and Bombus-derived genomes. We also identified eight and four biosynthetic gene clusters in all Gilliamella and Snodgrassella genomes, respectively. Our study provides a good insight to better understand how host heterogeneity influences the bacterial speciation and affects the versatility of the genome of the gut bacteria.}, } @article {pmid36288801, year = {2023}, author = {McInerney, JO}, title = {Prokaryotic Pangenomes Act as Evolving Ecosystems.}, journal = {Molecular biology and evolution}, volume = {40}, number = {1}, pages = {}, pmid = {36288801}, issn = {1537-1719}, mesh = {Phylogeny ; *Ecosystem ; *Evolution, Molecular ; Prokaryotic Cells ; Biological Evolution ; }, abstract = {Understanding adaptation to the local environment is a central tenet and a major focus of evolutionary biology. But this is only part of the adaptionist story. In addition to the external environment, one of the main drivers of genome composition is genetic background. In this perspective, I argue that there is a growing body of evidence that intra-genomic selective pressures play a significant part in the composition of prokaryotic genomes and play a significant role in the origin, maintenance and structuring of prokaryotic pangenomes.}, } @article {pmid36288260, year = {2022}, author = {Sun, X and Chen, Z and Kong, T and Chen, Z and Dong, Y and Kolton, M and Cao, Z and Zhang, X and Zhang, H and Liu, G and Gao, P and Yang, N and Lan, L and Xu, Y and Sun, W}, title = {Mycobacteriaceae Mineralizes Micropolyethylene in Riverine Ecosystems.}, journal = {Environmental science & technology}, volume = {56}, number = {22}, pages = {15705-15717}, doi = {10.1021/acs.est.2c05346}, pmid = {36288260}, issn = {1520-5851}, mesh = {Plastics/analysis ; Ecosystem ; Environmental Monitoring ; *Water Pollutants, Chemical/analysis ; *Mycobacteriaceae ; Carbon Dioxide/analysis ; Rivers/chemistry ; }, abstract = {Microplastic (MP) contamination is a serious global environmental problem. Plastic contamination has attracted extensive attention during the past decades. While physiochemical weathering may influence the properties of MPs, biodegradation by microorganisms could ultimately mineralize plastics into CO2. Compared to the well-studied marine ecosystems, the MP biodegradation process in riverine ecosystems, however, is less understood. The current study focuses on the MP biodegradation in one of the world's most plastic contaminated rivers, Pearl River, using micropolyethylene (mPE) as a model substrate. Mineralization of [13]C-labeled mPE into [13]CO2 provided direct evidence of mPE biodegradation by indigenous microorganisms. Several Actinobacteriota genera were identified as putative mPE degraders. Furthermore, two Mycobacteriaceae isolates related to the putative mPE degraders, Mycobacterium sp. mPE3 and Nocardia sp. mPE12, were retrieved, and their ability to mineralize [13]C-mPE into [13]CO2 was confirmed. Pangenomic analysis reveals that the genes related to the proposed mPE biodegradation pathway are shared by members of Mycobacteriaceae. While both Mycobacterium and Nocardia are known for their pathogenicity, these populations on the plastisphere in this study were likely nonpathogenic as they lacked virulence factors. The current study provided direct evidence for MP mineralization by indigenous biodegraders and predicted their biodegradation pathway, which may be harnessed to improve bioremediation of MPs in urban rivers.}, } @article {pmid36284702, year = {2022}, author = {Rodrigues Blanco, I and José Luduverio Pizauro, L and Victor Dos Anjos Almeida, J and Miguel Nóbrega Mendonça, C and de Mello Varani, A and Pinheiro de Souza Oliveira, R}, title = {Pan-genomic and comparative analysis of Pediococcus pentosaceus focused on the in silico assessment of pediocin-like bacteriocins.}, journal = {Computational and structural biotechnology journal}, volume = {20}, number = {}, pages = {5595-5606}, pmid = {36284702}, issn = {2001-0370}, abstract = {Bacteriocins are antimicrobial peptides produced by different species of bacteria, especially the Gram-positive lactic acid bacteria (LAB). Pediococcus pentosaceus is widely applied in the industry and stands out as Bacteriocin-Like Inhibitory Substances (BLIS) producer known to inhibit pathogens commonly considered a concern in the food industries. This study aimed to perform in silico comparisons of P. pentosaceus genomes available in the public GenBank database focusing on their pediocin-like bacteriocins repertoire. The pan-genome analysis evidenced a temporal signal in the pattern of gene gain and loss, supporting the hypothesis that the complete genetic repertoire of this group of bacteria is still uncovered. Thirteen bacteriocin genes from Class II and III were predicted in the accessory genome. Four pediocin-like bacteriocins (54% of the detected bacteriocin repertoire) and their accompanying immunity genes are highlighted; penocin A, coagulin A, pediocin PA-1, and plantaricin 423. Additionally, in silico, modeling of the pediocin-like bacteriocins revealed different configurations of the helix motif compared to other physically determined pediocin-like structures. Comparative and phylogenomic analyses support the hypothesis that a dynamic mechanism of bacteriocin acquisition and purging is not dependent on the bacterial isolation source origin. Synteny analysis revealed that while coagulin A, pediocin PA-1, and Plantaricin 423 loci are associated with insertion sequences mainly from the IS30 family and are likely of plasmid origin, penocin A lies in a conserved chromosomal locus. The results presented here provide insights into the unique pediocin-like bacteriocin peptide fold, genomic diversity, and the evolution of the bacteriocin genetic repertoire of P. pentosaceus, shedding new insights into the role of these biomolecules for application in inhibiting bacterial pathogens, and suggesting that prospecting and sequencing new strains is still an alternative to mining for new probiotic compounds.}, } @article {pmid36282844, year = {2022}, author = {Chia, CT and Bender, AT and Lillis, L and Sullivan, BP and Martin, CD and Burke, W and Landis, C and Boyle, DS and Posner, JD}, title = {Rapid detection of hepatitis C virus using recombinase polymerase amplification.}, journal = {PloS one}, volume = {17}, number = {10}, pages = {e0276582}, pmid = {36282844}, issn = {1932-6203}, mesh = {Humans ; Recombinases/genetics ; Hepacivirus/genetics ; Antiviral Agents ; *Hepatitis C, Chronic/diagnosis ; *Hepatitis C/diagnosis ; Nucleic Acid Amplification Techniques ; Sensitivity and Specificity ; RNA ; RNA, Viral/genetics ; }, abstract = {Over 71 million people are infected with hepatitis C virus (HCV) worldwide, and approximately 400,000 global deaths result from complications of untreated chronic HCV. Pan-genomic direct-acting antivirals (DAAs) have recently become widely available and feature high cure rates in less than 12 weeks of treatment. The rollout of DAAs is reliant on diagnostic tests for HCV RNA to identify eligible patients with viremic HCV infections. Current PCR-based HCV RNA assays are restricted to well-resourced central laboratories, and there remains a prevailing clinical need for expanded access to decentralized HCV RNA testing to provide rapid chronic HCV diagnosis and linkage to DAAs in outpatient clinics. This paper reports a rapid, highly accurate, and minimally instrumented assay for HCV RNA detection using reverse transcription recombinase polymerase amplification (RT-RPA). The assay detects all HCV genotypes with a limit of detection of 25 copies per reaction for genotype 1, the most prevalent in the United States and worldwide. The clinical sensitivity and specificity of the RT-RPA assay were both 100% when evaluated using 78 diverse clinical serum specimens. The accuracy, short runtime, and low heating demands of RT-RPA may enable implementation in a point-of-care HCV test to expand global access to effective treatment via rapid chronic HCV diagnosis.}, } @article {pmid36280878, year = {2022}, author = {Gourlie, R and McDonald, M and Hafez, M and Ortega-Polo, R and Low, KE and Abbott, DW and Strelkov, SE and Daayf, F and Aboukhaddour, R}, title = {The pangenome of the wheat pathogen Pyrenophora tritici-repentis reveals novel transposons associated with necrotrophic effectors ToxA and ToxB.}, journal = {BMC biology}, volume = {20}, number = {1}, pages = {239}, pmid = {36280878}, issn = {1741-7007}, mesh = {Plant Diseases/microbiology ; Phylogeny ; *Mycotoxins/genetics ; *Ascomycota/genetics ; }, abstract = {BACKGROUND: In fungal plant pathogens, genome rearrangements followed by selection pressure for adaptive traits have facilitated the co-evolutionary arms race between hosts and their pathogens. Pyrenophora tritici-repentis (Ptr) has emerged recently as a foliar pathogen of wheat worldwide and its populations consist of isolates that vary in their ability to produce combinations of different necrotrophic effectors. These effectors play vital roles in disease development. Here, we sequenced the genomes of a global collection (40 isolates) of Ptr to gain insights into its gene content and genome rearrangements.

RESULTS: A comparative genome analysis revealed an open pangenome, with an abundance of accessory genes (~ 57%) reflecting Ptr's adaptability. A clear distinction between pathogenic and non-pathogenic genomes was observed in size, gene content, and phylogenetic relatedness. Chromosomal rearrangements and structural organization, specifically around effector coding genes, were detailed using long-read assemblies (PacBio RS II) generated in this work in addition to previously assembled genomes. We also discovered the involvement of large mobile elements associated with Ptr's effectors: ToxA, the gene encoding for the necrosis effector, was found as a single copy within a 143-kb 'Starship' transposon (dubbed 'Horizon') with a clearly defined target site and target site duplications. 'Horizon' was located on different chromosomes in different isolates, indicating mobility, and the previously described ToxhAT transposon (responsible for horizontal transfer of ToxA) was nested within this newly identified Starship. Additionally, ToxB, the gene encoding the chlorosis effector, was clustered as three copies on a 294-kb element, which is likely a different putative 'Starship' (dubbed 'Icarus') in a ToxB-producing isolate. ToxB and its putative transposon were missing from the ToxB non-coding reference isolate, but the homolog toxb and 'Icarus' were both present in a different non-coding isolate. This suggests that ToxB may have been mobile at some point during the evolution of the Ptr genome which is contradictory to the current assumption of ToxB vertical inheritance. Finally, the genome architecture of Ptr was defined as 'one-compartment' based on calculated gene distances and evolutionary rates.

CONCLUSIONS: These findings together reflect on the highly plastic nature of the Ptr genome which has likely helped to drive its worldwide adaptation and has illuminated the involvement of giant transposons in facilitating the evolution of virulence in Ptr.}, } @article {pmid36278460, year = {2022}, author = {Suryaletha, K and Savithri, AV and Nayar, SA and Asokan, S and Rajeswary, D and Thomas, S}, title = {Demystifying Bacteriocins of Human Microbiota by Genome Guided Prospects: An Impetus to Rekindle the Antimicrobial Research.}, journal = {Current protein & peptide science}, volume = {23}, number = {12}, pages = {811-822}, pmid = {36278460}, issn = {1875-5550}, mesh = {Humans ; *Bacteriocins/genetics/pharmacology ; Anti-Bacterial Agents/pharmacology ; *Microbiota ; Bacteria/genetics ; }, abstract = {The human microbiome is a reservoir of potential bacteriocins that can counteract multidrug resistant bacterial pathogens. Unlike antibiotics, bacteriocins selectively inhibit a spectrum of competent bacteria and are said to safeguard gut commensals, reducing the chance of dysbiosis. Bacteriocinogenic probiotics or bacteriocins of human origin will be more pertinent in human physiological conditions for therapeutic applications to act against invading pathogens. Recent advancement in the omics approach enables the mining of diverse and novel bacteriocins by identifying biosynthetic gene clusters from the human microbial genome, pangenome or shotgun metagenome, which is a breakthrough in the discovery line of novel bacteriocins. This review summarizes the most recent trends and therapeutic potential of bacteriocins of human microbial origin, the advancement in the in silico algorithms and databases in the discovery of novel bacteriocin, and how to bridge the gap between the discovery of bacteriocin genes from big datasets and their in vitro production. Besides, the later part of the review discussed the various impediments in their clinical applications and possible solution to bring them into the frontline therapeutics to control infections, thereby meeting the challenges of global antimicrobial resistance.}, } @article {pmid36265748, year = {2023}, author = {González-Torres, B and González-Gómez, JP and Ramírez, K and Castro-Del Campo, N and González-López, I and Garrido-Palazuelos, LI and Chaidez, C and Medrano-Félix, JA}, title = {Population structure of the Salmonella enterica serotype Oranienburg reveals similar virulence, regardless of isolation years and sources.}, journal = {Gene}, volume = {851}, number = {}, pages = {146966}, doi = {10.1016/j.gene.2022.146966}, pmid = {36265748}, issn = {1879-0038}, mesh = {Serogroup ; Virulence/genetics ; *Salmonella enterica/genetics ; Salmonella ; Anti-Bacterial Agents ; }, abstract = {Salmonella enterica serotype Oranienburg is a multi-host, ubiquitous, and prevalent Non-typhoidal Salmonella (NTS) in subtropical rivers, particularly in sediments; little studied so far possible the adaptation and establishment of this microorganism based on its genetic content. This study was focused on the first five genomes of S. Oranienburg in sediments through whole-genome sequencing (WGS) and 61 river water genomes isolated in previous studies. Results showed an open pangenome with 5,594 gene clusters (GCs), and the division of their categories showed; 3,303 core genes, 741 persistent genes, 1,282 accessory genes, and 268 unique genes. Additionally, it showed three main subclades within the same serotype and showed a conserved genetic content, suggesting the display of different adaptation strategies to its establishment. Nine genes for antimicrobial resistance were detected: aac (6') - Iy, H-NS, golS, marA, mdsABC, mdtK, and sdiA, and a mutation in the parC gene p. T57S generating a resistance. In addition, virulence genes and pathogenicity islands (SPI's) were analyzed, finding 92 genes and an identity above 80 % in the SPI's 1 to 5, and the centisomes 54 and 63. The environmental strains of S. Oranienburg do not represent a concern as multidrug resistance (MDR) bacterium; however, virulence genes remain a potential health risk. This study contributes to understanding its adaptation to aquatic environments in Mexico.}, } @article {pmid36263788, year = {2022}, author = {Dyrhage, K and Garcia-Montaner, A and Tamarit, D and Seeger, C and Näslund, K and Olofsson, TC and Vasquez, A and Webster, MT and Andersson, SGE}, title = {Genome Evolution of a Symbiont Population for Pathogen Defense in Honeybees.}, journal = {Genome biology and evolution}, volume = {14}, number = {11}, pages = {}, pmid = {36263788}, issn = {1759-6653}, mesh = {Bees/genetics ; Animals ; *Genome, Bacterial ; *Gastrointestinal Microbiome ; Bacteria ; Evolution, Molecular ; }, abstract = {The honeybee gut microbiome is thought to be important for bee health, but the role of the individual members is poorly understood. Here, we present closed genomes and associated mobilomes of 102 Apilactobacillus kunkeei isolates obtained from the honey crop (foregut) of honeybees sampled from beehives in Helsingborg in the south of Sweden and from the islands Gotland and Åland in the Baltic Sea. Each beehive contained a unique composition of isolates and repeated sampling of similar isolates from two beehives in Helsingborg suggests that the bacterial community is stably maintained across bee generations during the summer months. The sampled bacterial population contained an open pan-genome structure with a high genomic density of transposons. A subset of strains affiliated with phylogroup A inhibited growth of the bee pathogen Melissococcus plutonius, all of which contained a 19.5 kb plasmid for the synthesis of the antimicrobial compound kunkecin A, while a subset of phylogroups B and C strains contained a 32.9 kb plasmid for the synthesis of a putative polyketide antibiotic. This study suggests that the mobile gene pool of A. kunkeei plays a key role in pathogen defense in honeybees, providing new insights into the evolutionary dynamics of defensive symbiont populations.}, } @article {pmid36261518, year = {2022}, author = {Jarvis, ED and Formenti, G and Rhie, A and Guarracino, A and Yang, C and Wood, J and Tracey, A and Thibaud-Nissen, F and Vollger, MR and Porubsky, D and Cheng, H and Asri, M and Logsdon, GA and Carnevali, P and Chaisson, MJP and Chin, CS and Cody, S and Collins, J and Ebert, P and Escalona, M and Fedrigo, O and Fulton, RS and Fulton, LL and Garg, S and Gerton, JL and Ghurye, J and Granat, A and Green, RE and Harvey, W and Hasenfeld, P and Hastie, A and Haukness, M and Jaeger, EB and Jain, M and Kirsche, M and Kolmogorov, M and Korbel, JO and Koren, S and Korlach, J and Lee, J and Li, D and Lindsay, T and Lucas, J and Luo, F and Marschall, T and Mitchell, MW and McDaniel, J and Nie, F and Olsen, HE and Olson, ND and Pesout, T and Potapova, T and Puiu, D and Regier, A and Ruan, J and Salzberg, SL and Sanders, AD and Schatz, MC and Schmitt, A and Schneider, VA and Selvaraj, S and Shafin, K and Shumate, A and Stitziel, NO and Stober, C and Torrance, J and Wagner, J and Wang, J and Wenger, A and Xiao, C and Zimin, AV and Zhang, G and Wang, T and Li, H and Garrison, E and Haussler, D and Hall, I and Zook, JM and Eichler, EE and Phillippy, AM and Paten, B and Howe, K and Miga, KH and , }, title = {Semi-automated assembly of high-quality diploid human reference genomes.}, journal = {Nature}, volume = {611}, number = {7936}, pages = {519-531}, pmid = {36261518}, issn = {1476-4687}, support = {R01 HG006677/HG/NHGRI NIH HHS/United States ; U01 HG010961/HG/NHGRI NIH HHS/United States ; R35 GM130151/GM/NIGMS NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; R01 HG010040/HG/NHGRI NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; }, mesh = {Humans ; *Chromosome Mapping/standards ; *Diploidy ; *Genome, Human/genetics ; Haplotypes/genetics ; High-Throughput Nucleotide Sequencing/methods/standards ; Sequence Analysis, DNA/methods/standards ; Reference Standards ; *Genomics/methods/standards ; Chromosomes, Human/genetics ; Genetic Variation/genetics ; }, abstract = {The current human reference genome, GRCh38, represents over 20 years of effort to generate a high-quality assembly, which has benefitted society[1,2]. However, it still has many gaps and errors, and does not represent a biological genome as it is a blend of multiple individuals[3,4]. Recently, a high-quality telomere-to-telomere reference, CHM13, was generated with the latest long-read technologies, but it was derived from a hydatidiform mole cell line with a nearly homozygous genome[5]. To address these limitations, the Human Pangenome Reference Consortium formed with the goal of creating high-quality, cost-effective, diploid genome assemblies for a pangenome reference that represents human genetic diversity[6]. Here, in our first scientific report, we determined which combination of current genome sequencing and assembly approaches yield the most complete and accurate diploid genome assembly with minimal manual curation. Approaches that used highly accurate long reads and parent-child data with graph-based haplotype phasing during assembly outperformed those that did not. Developing a combination of the top-performing methods, we generated our first high-quality diploid reference assembly, containing only approximately four gaps per chromosome on average, with most chromosomes within ±1% of the length of CHM13. Nearly 48% of protein-coding genes have non-synonymous amino acid changes between haplotypes, and centromeric regions showed the highest diversity. Our findings serve as a foundation for assembling near-complete diploid human genomes at scale for a pangenome reference to capture global genetic variation from single nucleotides to structural rearrangements.}, } @article {pmid36258067, year = {2022}, author = {Abram, KZ and Jun, SR and Udaondo, Z}, title = {Pseudomonas aeruginosa Pangenome: Core and Accessory Genes of a Highly Resourceful Opportunistic Pathogen.}, journal = {Advances in experimental medicine and biology}, volume = {1386}, number = {}, pages = {3-28}, pmid = {36258067}, issn = {0065-2598}, mesh = {*Pseudomonas aeruginosa/genetics ; *Genome, Bacterial ; Anti-Bacterial Agents ; Amino Acids ; Carbohydrates ; Phylogeny ; }, abstract = {In this chapter, we leverage a novel approach to assess the seamless population structure of Pseudomonas aeruginosa, using the full repertoire of genomes sequenced to date (GenBank, April 6, 2020). In order to assess the set of core functions that represents the species as well as the differences in these core functions among the phylogroups observed in the population structure analysis, we performed pangenome analyses at the species level and at the phylogroup level. The existence of the phylogroups described in the population structure analyses was supported by their different profiles of antibiotic-resistant determinants. Finally, we utilized a presence/absence matrix of protein families from the entire species to evaluate if P. aeruginosa phylogroups can be differentiated according to their accessory genomic content. Our analysis shows that the core genome of P. aeruginosa is approximately 62% of the average gene content for the species, and it is highly enriched with pathways related to the metabolism of carbohydrates and amino acids as well as cellular processes and cell maintenance. The analysis of the accessory genome of P. aeruginosa performed in this chapter confirmed not only the existence of the three phylogroups previously described in the population structure analysis, but also of 29 genetic substructures (subgroups) within the main phylogroups. Our work illustrates the utility of populations genomics pipelines to better understand highly complex bacterial species such as P. aeruginosa.}, } @article {pmid36255144, year = {2023}, author = {Wang, S and Qian, YQ and Zhao, RP and Chen, LL and Song, JM}, title = {Graph-based pan-genomes: increased opportunities in plant genomics.}, journal = {Journal of experimental botany}, volume = {74}, number = {1}, pages = {24-39}, doi = {10.1093/jxb/erac412}, pmid = {36255144}, issn = {1460-2431}, mesh = {*Genomics ; *Genome, Plant/genetics ; Sequence Analysis, DNA ; Polymorphism, Single Nucleotide ; }, abstract = {Due to the development of sequencing technology and the great reduction in sequencing costs, an increasing number of plant genomes have been assembled, and numerous genomes have revealed large amounts of variations. However, a single reference genome does not allow the exploration of species diversity, and therefore the concept of pan-genome was developed. A pan-genome is a collection of all sequences available for a species, including a large number of consensus sequences, large structural variations, and small variations including single nucleotide polymorphisms and insertions/deletions. A simple linear pan-genome does not allow these structural variations to be intuitively characterized, so graph-based pan-genomes have been developed. These pan-genomes store sequence and structural variation information in the form of nodes and paths to store and display species variation information in a more intuitive manner. The key role of graph-based pan-genomes is to expand the coordinate system of the linear reference genome to accommodate more regions of genetic diversity. Here, we review the origin and development of graph-based pan-genomes, explore their application in plant research, and further highlight the application of graph-based pan-genomes for future plant breeding.}, } @article {pmid36250060, year = {2022}, author = {Monshizadeh, M and Zomorodi, S and Mortensen, K and Ye, Y}, title = {Revealing bacteria-phage interactions in human microbiome through the CRISPR-Cas immune systems.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {933516}, pmid = {36250060}, issn = {2235-2988}, support = {R01 AI143254/AI/NIAID NIH HHS/United States ; }, mesh = {Bacteria/genetics ; *Bacteriophages/genetics ; CRISPR-Cas Systems ; Humans ; Immune System ; *Microbiota/genetics ; }, abstract = {The human gut microbiome is composed of a diverse consortium of microorganisms. Relatively little is known about the diversity of the bacteriophage population and their interactions with microbial organisms in the human microbiome. Due to the persistent rivalry between microbial organisms (hosts) and phages (invaders), genetic traces of phages are found in the hosts' CRISPR-Cas adaptive immune system. Mobile genetic elements (MGEs) found in bacteria include genetic material from phage and plasmids, often resultant from invasion events. We developed a computational pipeline (BacMGEnet), which can be used for inference and exploratory analysis of putative interactions between microbial organisms and MGEs (phages and plasmids) and their interaction network. Given a collection of genomes as the input, BacMGEnet utilizes computational tools we have previously developed to characterize CRISPR-Cas systems in the genomes, which are then used to identify putative invaders from publicly available collections of phage/prophage sequences. In addition, BacMGEnet uses a greedy algorithm to summarize identified putative interactions to produce a bacteria-MGE network in a standard network format. Inferred networks can be utilized to assist further examination of the putative interactions and for discovery of interaction patterns. Here we apply the BacMGEnet pipeline to a few collections of genomic/metagenomic datasets to demonstrate its utilities. BacMGEnet revealed a complex interaction network of the Phocaeicola vulgatus pangenome with its phage invaders, and the modularity analysis of the resulted network suggested differential activities of the different P. vulgatus' CRISPR-Cas systems (Type I-C and Type II-C) against some phages. Analysis of the phage-bacteria interaction network of human gut microbiome revealed a mixture of phages with a broad host range (resulting in large modules with many bacteria and phages), and phages with narrow host range. We also showed that BacMGEnet can be used to infer phages that invade bacteria and their interactions in wound microbiome. We anticipate that BacMGEnet will become an important tool for studying the interactions between bacteria and their invaders for microbiome research.}, } @article {pmid36238595, year = {2022}, author = {Palevich, N and Palevich, FP and Gardner, A and Brightwell, G and Mills, J}, title = {Genome collection of Shewanella spp. isolated from spoiled lamb.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {976152}, pmid = {36238595}, issn = {1664-302X}, abstract = {The diversity of the genus Shewanella and their roles across a variety of ecological niches is largely unknown highlighting the phylogenetic diversity of these bacteria. From a food safety perspective, Shewanella species have been recognized as causative spoilage agents of vacuum-packed meat products. However, the genetic basis and metabolic pathways for the spoilage mechanism are yet to be explored due to the unavailability of relevant Shewanella strains and genomic resources. In this study, whole-genome sequencing of 32 Shewanella strains isolated from vacuum-packaged refrigerated spoiled lamb was performed to examine their roles in meat spoilage. Phylogenomic reconstruction revealed their genomic diversity with 28 Shewanella spp. strains belonging to the same putative novel species, two Shewanella glacialipiscicola strains (SM77 and SM91), Shewanella xiamenensis NZRM825, and Shewanella putrefaciens DSM 50426 (ATCC 8072) isolated from butter. Genome-wide clustering of orthologous gene families revealed functional groupings within the major Shewanella cluster but also considerable plasticity across the different species. Pan-genome analysis revealed conserved occurrence of spoilage genes associated with sulfur and putrescine metabolism, while the complete set of trimethylamine metabolism genes was observed in only Shewanella sp. SM74, S. glacialipiscicola SM77 and SM91 strains. Through comparative genomics, some variations were also identified pertaining to genes associated with adaptation to environmental cues such as temperature, osmotic, salt, oxidative, antimicrobial peptide, and drug resistance stresses. Here we provide a reference collection of draft Shewanella genomes for subsequent species descriptions and future investigations into the molecular spoilage mechanisms for further applications in the meat industry.}, } @article {pmid36226968, year = {2022}, author = {Jana, B and Keppel, K and Fridman, CM and Bosis, E and Salomon, D}, title = {Multiple T6SSs, Mobile Auxiliary Modules, and Effectors Revealed in a Systematic Analysis of the Vibrio parahaemolyticus Pan-Genome.}, journal = {mSystems}, volume = {7}, number = {6}, pages = {e0072322}, pmid = {36226968}, issn = {2379-5077}, mesh = {Animals ; Humans ; *Type VI Secretion Systems/genetics ; *Vibrio parahaemolyticus/genetics ; Bacterial Proteins/genetics ; Bacteria/metabolism ; Anti-Bacterial Agents/metabolism ; }, abstract = {Type VI secretion systems (T6SSs) play a major role in interbacterial competition and in bacterial interactions with eukaryotic cells. The distribution of T6SSs and the effectors they secrete vary between strains of the same bacterial species. Therefore, a pan-genome investigation is required to better understand the T6SS potential of a bacterial species of interest. Here, we performed a comprehensive, systematic analysis of T6SS gene clusters and auxiliary modules found in the pan-genome of Vibrio parahaemolyticus, an emerging pathogen widespread in marine environments. We identified 4 different T6SS gene clusters within genomes of this species; two systems appear to be ancient and widespread, whereas the other 2 systems are rare and appear to have been more recently acquired via horizontal gene transfer. In addition, we identified diverse T6SS auxiliary modules containing putative effectors with either known or predicted toxin domains. Many auxiliary modules are possibly horizontally shared between V. parahaemolyticus genomes, since they are flanked by DNA mobility genes. We further investigated a DUF4225-containing protein encoded on an Hcp auxiliary module, and we showed that it is an antibacterial T6SS effector that exerts its toxicity in the bacterial periplasm, leading to cell lysis. Computational analyses of DUF4225 revealed a widespread toxin domain associated with various toxin delivery systems. Taken together, our findings reveal a diverse repertoire of T6SSs and auxiliary modules in the V. parahaemolyticus pan-genome, as well as novel T6SS effectors and toxin domains that can play a major role in the interactions of this species with other cells. IMPORTANCE Gram-negative bacteria employ toxin delivery systems to mediate their interactions with neighboring cells. Vibrio parahaemolyticus, an emerging pathogen of humans and marine animals, was shown to deploy antibacterial toxins into competing bacteria via the type VI secretion system (T6SS). Here, we analyzed 1,727 V. parahaemolyticus genomes and revealed the pan-genome T6SS repertoire of this species, including the T6SS gene clusters, horizontally shared auxiliary modules, and toxins. We also identified a role for a previously uncharacterized domain, DUF4225, as a widespread antibacterial toxin associated with diverse toxin delivery systems.}, } @article {pmid36223424, year = {2022}, author = {Wang, F and Guo, Y and Liu, Z and Wang, Q and Jiang, Y and Zhao, G}, title = {New insights into the novel sequences of the chicken pan-genome by liquid chip.}, journal = {Journal of animal science}, volume = {100}, number = {12}, pages = {}, pmid = {36223424}, issn = {1525-3163}, support = {32072708//National Natural Science Foundation of China/ ; 2022JQ-171//Natural Science Basic Research Program of Shaanxi Province/ ; }, mesh = {Animals ; *Chickens/genetics ; Genotype ; *Polymorphism, Single Nucleotide ; Genome ; Oligonucleotide Array Sequence Analysis/veterinary ; }, abstract = {Increasing evidence indicates that the missing sequences and genes in the chicken reference genome are involved in many crucial biological pathways, including metabolism and immunity. The low detection rate of novel sequences by resequencing hindered the acquisition of these sequences and the exploration of the relationship between new genes and economic traits. To improve the capture ratio of novel sequences, a 48K liquid chip including 25K from the reference sequence and 23K from the novel sequence was designed. The assay was tested on a panel of 218 animals from 5 chicken breeds. The average capture ratio of the reference sequence was 99.55%, and the average sequencing depth of the target sites was approximately 187X, indicating a good performance and successful application of liquid chips in farm animals. For the target region in the novel sequence, the average capture ratio was 33.15% and the average sequencing depth of target sites was approximately 60X, both of which were higher than that of resequencing. However, the different capture ratios and capture regions among varieties and individuals proved the difficulty of capturing these regions with complex structures. After genotyping, GWAS showed variations in novel sequences potentially relevant to immune-related traits. For example, a SNP close to the differentiation of lymphocyte-related gene IGHV3-23-like was associated with the H/L ratio. These results suggest that targeted capture sequencing is a preferred method to capture these sequences with complex structures and genes potentially associated with immune-related traits.}, } @article {pmid36223396, year = {2022}, author = {Wagner, DM and Birdsell, DN and McDonough, RF and Nottingham, R and Kocos, K and Celona, K and Özsürekci, Y and Öhrman, C and Karlsson, L and Myrtennäs, K and Sjödin, A and Johansson, A and Keim, PS and Forsman, M and Sahl, JW}, title = {Genomic characterization of Francisella tularensis and other diverse Francisella species from complex samples.}, journal = {PloS one}, volume = {17}, number = {10}, pages = {e0273273}, pmid = {36223396}, issn = {1932-6203}, mesh = {Animals ; *Anti-Infective Agents ; DNA, Bacterial/genetics ; *Francisella tularensis/genetics ; Genomics ; Humans ; Phylogeny ; RNA ; *Tularemia/microbiology ; }, abstract = {Francisella tularensis, the bacterium that causes the zoonosis tularemia, and its genetic near neighbor species, can be difficult or impossible to cultivate from complex samples. Thus, there is a lack of genomic information for these species that has, among other things, limited the development of robust detection assays for F. tularensis that are both specific and sensitive. The objective of this study was to develop and validate approaches to capture, enrich, sequence, and analyze Francisella DNA present in DNA extracts generated from complex samples. RNA capture probes were designed based upon the known pan genome of F. tularensis and other diverse species in the family Francisellaceae. Probes that targeted genomic regions also present in non-Francisellaceae species were excluded, and probes specific to particular Francisella species or phylogenetic clades were identified. The capture-enrichment system was then applied to diverse, complex DNA extracts containing low-level Francisella DNA, including human clinical tularemia samples, environmental samples (i.e., animal tissue and air filters), and whole ticks/tick cell lines, which was followed by sequencing of the enriched samples. Analysis of the resulting data facilitated rigorous and unambiguous confirmation of the detection of F. tularensis or other Francisella species in complex samples, identification of mixtures of different Francisella species in the same sample, analysis of gene content (e.g., known virulence and antimicrobial resistance loci), and high-resolution whole genome-based genotyping. The benefits of this capture-enrichment system include: even very low target DNA can be amplified; it is culture-independent, reducing exposure for research and/or clinical personnel and allowing genomic information to be obtained from samples that do not yield isolates; and the resulting comprehensive data not only provide robust means to confirm the presence of a target species in a sample, but also can provide data useful for source attribution, which is important from a genomic epidemiology perspective.}, } @article {pmid36219094, year = {2022}, author = {Bista, PK and Pillai, D and Roy, C and Scaria, J and Narayanan, SK}, title = {Comparative Genomic Analysis of Fusobacterium necrophorum Provides Insights into Conserved Virulence Genes.}, journal = {Microbiology spectrum}, volume = {10}, number = {6}, pages = {e0029722}, pmid = {36219094}, issn = {2165-0497}, mesh = {Animals ; Cattle ; Humans ; *Fusobacterium necrophorum/genetics ; Virulence/genetics ; Base Composition ; Phylogeny ; Sequence Analysis, DNA ; RNA, Ribosomal, 16S/genetics ; *Genomics ; }, abstract = {Fusobacterium necrophorum is a Gram-negative, filamentous anaerobe prevalent in the mucosal flora of animals and humans. It causes necrotic infections in cattle, resulting in a substantial economic impact on the cattle industry. Although infection severity and management differ within F. necrophorum species, little is known about F. necrophorum speciation and the genetic virulence determinants between strains. To characterize the clinical isolates, we performed whole-genome sequencing of four bovine isolates (8L1, 212, B17, and SM1216) and one human isolate (MK12). To determine the phylogenetic relationship and evolution pattern and investigate the presence of antimicrobial resistance genes (ARGs) and potential virulence genes of F. necrophorum, we also performed comparative genomics with publicly available Fusobacterium genomes. Using up-to-date bacterial core gene (UBCG) set analysis, we uncovered distinct Fusobacterium species and F. necrophorum subspecies clades. Pangenome analyses revealed a high level of diversity among Fusobacterium strains down to species levels. The output also identified 14 and 26 genes specific to F. necrophorum subsp. necrophorum and F. necrophorum subsp. funduliforme, respectively, which could be essential for bacterial survival under different environmental conditions. ClonalFrameML-based recombination analysis suggested that extensive recombination among accessory genes led to species divergence. Furthermore, the only strain of F. necrophorum with ARGs was F. necrophorum subsp. funduliforme B35, with acquired macrolide and tetracycline resistance genes. Our custom search revealed common virulence genes, including toxins, adhesion proteins, outer membrane proteins, cell envelope, type IV secretion system, ABC (ATP-binding cassette) transporters, and transporter proteins. A focused study on these genes could help identify major virulence genes and inform effective vaccination strategies against fusobacterial infections. IMPORTANCE Fusobacterium necrophorum is an anaerobic bacterium that causes liver abscesses in cattle with an annual incidence rate of 10% to 20%, resulting in a substantial economic impact on the cattle industry. The lack of definite biochemical tests makes it difficult to distinguish F. necrophorum subspecies phenotypically, where genomic characterization plays a significant role. However, due to the lack of a good reference genome for comparison, F. necrophorum subspecies-level identification represents a significant challenge. To overcome this challenge, we used comparative genomics to validate clinical test strains for subspecies-level identification. The findings of our study help predict specific clades of previously uncharacterized strains of F. necrophorum. Our study identifies both general and subspecies-specific virulence genes through a custom search-based analysis. The virulence genes identified in this study can be the focus of future studies aimed at evaluating their potential as vaccine targets to prevent fusobacterial infections in cattle.}, } @article {pmid36214662, year = {2022}, author = {Moolhuijzen, PM and See, PT and Shi, G and Powell, HR and Cockram, J and Jørgensen, LN and Benslimane, H and Strelkov, SE and Turner, J and Liu, Z and Moffat, CS}, title = {A global pangenome for the wheat fungal pathogen Pyrenophora tritici-repentis and prediction of effector protein structural homology.}, journal = {Microbial genomics}, volume = {8}, number = {10}, pages = {}, pmid = {36214662}, issn = {2057-5858}, mesh = {Ascomycota ; Host-Pathogen Interactions/genetics ; *Mycotoxins/genetics/metabolism ; Plant Diseases/microbiology ; Structural Homology, Protein ; *Triticum/genetics/metabolism/microbiology ; }, abstract = {The adaptive potential of plant fungal pathogens is largely governed by the gene content of a species, consisting of core and accessory genes across the pathogen isolate repertoire. To approximate the complete gene repertoire of a globally significant crop fungal pathogen, a pan genomic analysis was undertaken for Pyrenophora tritici-repentis (Ptr), the causal agent of tan (or yellow) spot disease in wheat. In this study, 15 new Ptr genomes were sequenced, assembled and annotated, including isolates from three races not previously sequenced. Together with 11 previously published Ptr genomes, a pangenome for 26 Ptr isolates from Australia, Europe, North Africa and America, representing nearly all known races, revealed a conserved core-gene content of 57 % and presents a new Ptr resource for searching natural homologues (orthologues not acquired by horizontal transfer from another species) using remote protein structural homology. Here, we identify for the first time a non-synonymous mutation in the Ptr necrotrophic effector gene ToxB, multiple copies of the inactive toxb within an isolate, a distant natural Pyrenophora homologue of a known Parastagonopora nodorum necrotrophic effector (SnTox3), and clear genomic break points for the ToxA effector horizontal transfer region. This comprehensive genomic analysis of Ptr races includes nine isolates sequenced via long read technologies. Accordingly, these resources provide a more complete representation of the species, and serve as a resource to monitor variations potentially involved in pathogenicity.}, } @article {pmid36212836, year = {2022}, author = {Kim, E and Yang, SM and Kim, IS and Lee, SY and Kim, HY}, title = {Identification of Leuconostoc species based on novel marker genes identified using real-time PCR via computational pangenome analysis.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {1014872}, pmid = {36212836}, issn = {1664-302X}, abstract = {Leuconostoc species are important microorganisms in food fermentation but also cause food spoilage. Although these species are commercially important, their taxonomy is still based on inaccurate identification methods. Here, we used computational pangenome analysis to develop a real-time PCR-based method for identifying and differentiating the 12 major Leuconostoc species found in food. Analysis of pan and core-genome phylogenies showed clustering of strains into 12 distinct groups according to the species. Pangenome analysis of 130 Leuconostoc genomes from these 12 species enabled the identification of each species-specific gene. In silico testing of the species-specific genes against 143 publicly available Leuconostoc and 100 other lactic acid bacterial genomes showed that all the assays had 100% inclusivity/exclusivity. We also verified the specificity for each primer pair targeting each specific gene using 23 target and 124 non-target strains and found high specificity (100%). The sensitivity of the real-time PCR method was 10[2] colony forming units (CFUs)/ml in pure culture and spiked food samples. All standard curves showed good linear correlations, with an R [2] value of ≥0.996, suggesting that screened targets have good specificity and strong anti-interference ability from food sample matrices and non-target strains. The real-time PCR method can be potentially used to determine the taxonomic status and identify the Leuconostoc species in foods.}, } @article {pmid36209154, year = {2022}, author = {Marcet-Houben, M and Alvarado, M and Ksiezopolska, E and Saus, E and de Groot, PWJ and Gabaldón, T}, title = {Chromosome-level assemblies from diverse clades reveal limited structural and gene content variation in the genome of Candida glabrata.}, journal = {BMC biology}, volume = {20}, number = {1}, pages = {226}, pmid = {36209154}, issn = {1741-7007}, mesh = {*Candida glabrata/chemistry/genetics ; Chromosomes ; *Fungal Proteins/genetics ; Genome, Fungal ; Plastics ; }, abstract = {BACKGROUND: Candida glabrata is an opportunistic yeast pathogen thought to have a large genetic and phenotypic diversity and a highly plastic genome. However, the lack of chromosome-level genome assemblies representing this diversity limits our ability to accurately establish how chromosomal structure and gene content vary across strains.

RESULTS: Here, we expanded publicly available assemblies by using long-read sequencing technologies in twelve diverse strains, obtaining a final set of twenty-one chromosome-level genomes spanning the known C. glabrata diversity. Using comparative approaches, we inferred variation in chromosome structure and determined the pan-genome, including an analysis of the adhesin gene repertoire. Our analysis uncovered four new adhesin orthogroups and inferred a rich ancestral adhesion repertoire, which was subsequently shaped through a still ongoing process of gene loss, gene duplication, and gene conversion.

CONCLUSIONS: C. glabrata has a largely stable pan-genome except for a highly variable subset of genes encoding cell wall-associated functions. Adhesin repertoire was established for each strain and showed variability among clades.}, } @article {pmid36209064, year = {2022}, author = {Harling-Lee, JD and Gorzynski, J and Yebra, G and Angus, T and Fitzgerald, JR and Freeman, TC}, title = {A graph-based approach for the visualisation and analysis of bacterial pangenomes.}, journal = {BMC bioinformatics}, volume = {23}, number = {1}, pages = {416}, pmid = {36209064}, issn = {1471-2105}, support = {ETM/421/CSO_/Chief Scientist Office/United Kingdom ; 201531/Z/16/Z/WT_/Wellcome Trust/United Kingdom ; BB/PO13740/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BB/P013732/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {Bacteria/genetics ; *Genome, Bacterial ; *High-Throughput Nucleotide Sequencing/methods ; Sequence Analysis, DNA/methods ; }, abstract = {BACKGROUND: The advent of low cost, high throughput DNA sequencing has led to the availability of thousands of complete genome sequences for a wide variety of bacterial species. Examining and interpreting genetic variation on this scale represents a significant challenge to existing methods of data analysis and visualisation.

RESULTS: Starting with the output of standard pangenome analysis tools, we describe the generation and analysis of interactive, 3D network graphs to explore the structure of bacterial populations, the distribution of genes across a population, and the syntenic order in which those genes occur, in the new open-source network analysis platform, Graphia. Both the analysis and the visualisation are scalable to datasets of thousands of genome sequences.

CONCLUSIONS: We anticipate that the approaches presented here will be of great utility to the microbial research community, allowing faster, more intuitive, and flexible interaction with pangenome datasets, thereby enhancing interpretation of these complex data.}, } @article {pmid36205822, year = {2022}, author = {Deb, S}, title = {Pan-genome evolution and its association with divergence of metabolic functions in Bifidobacterium genus.}, journal = {World journal of microbiology & biotechnology}, volume = {38}, number = {12}, pages = {231}, pmid = {36205822}, issn = {1573-0972}, mesh = {*Bifidobacterium/genetics ; Carbohydrates ; Evolution, Molecular ; *Genome, Bacterial/genetics ; Humans ; Phylogeny ; }, abstract = {Previous studies were mainly focused on genomic evolution and diversity of type species of Bifidobacterium genus due to their health-promoting effect on host. However, those studies were mainly based on species-level taxonomic resolution, adaptation, and characterization of carbohydrate metabolic features of the bifidobacterial species. Here, a comprehensive analysis of the type strain genome unveils the association of pan-genome evolution with the divergence of metabolic function of the Bifidobacterium genus. This study has also demonstrated that horizontal gene transfer, as well as genome expansion and reduction events, leads to the divergence of metabolic functions in Bifidobacterium genus. Furthermore, the genome-based search of probiotic traits among all the available bifidobacterial type strains gives hints on type species, that could confer health benefits to nutrient-deficient individuals. Altogether, the present study provides insight into the developments of genomic evolution, functional divergence, and potential probiotic type species of the Bifidobacterium genus.}, } @article {pmid36204049, year = {2022}, author = {Nisar, T and Tahir, MHN and Iqbal, S and Sajjad, M and Nadeem, MA and Qanmber, G and Baig, A and Khan, Z and Zhao, Z and Geng, Z and Ur Rehman, S}, title = {Genome-wide characterization and sequence polymorphism analyses of cysteine-rich poly comb-like protein in Glycine max.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {996265}, pmid = {36204049}, issn = {1664-462X}, abstract = {Cysteine-rich poly comb-like protein (CPP) is a member of cysteine-rich transcription factors that regulates plant growth and development. In the present work, we characterized twelve CPP transcription factors encoding genes in soybean (Glycine max). Phylogenetic analyses classified CPP genes into six clades. Sequence logos analyses between G. max and G. soja amino acid residues exhibited high conservation. The presence of growth and stress-related cis-acting elements in the upstream regions of GmCPPs highlight their role in plant development and tolerance against abiotic stress. Ka/Ks levels showed that GmCPPs experienced limited selection pressure with limited functional divergence arising from segmental or whole genome duplication events. By using the PAN-genome of soybean, a single nucleotide polymorphism was identified in GmCPP-6. To perform high throughput genotyping, a kompetitive allele-specific PCR (KASP) marker was developed. Association analyses indicated that GmCPP-6-T allele of GmCPP-6 (in exon region) was associated with higher thousand seed weight under both water regimes (well-water and water-limited). Taken together, these results provide vital information to further decipher the biological functions of CPP genes in soybean molecular breeding.}, } @article {pmid36201155, year = {2022}, author = {Torres-Miranda, A and Melis-Arcos, F and Garrido, D}, title = {Characterization and Identification of Probiotic Features in Lacticaseibacillus Paracasei Using a Comparative Genomic Analysis Approach.}, journal = {Probiotics and antimicrobial proteins}, volume = {14}, number = {6}, pages = {1211-1224}, pmid = {36201155}, issn = {1867-1314}, mesh = {Inulin/metabolism ; Phylogeny ; *Lacticaseibacillus paracasei/metabolism ; *Probiotics ; Genomics ; }, abstract = {Lacticaseibacillus paracasei species are widely used for their health-promoting properties in food and agricultural applications. These bacteria have been isolated from various habitats such as the oral cavity, cereals, vegetables, meats, and dairy products conferring them the ability to consume different carbohydrates. Two subspecies are recognized, Lacticaseibacillus paracasei subsp. paracasei and Lacticaseibacillus paracasei subsp. tolerans according to their acid production from carbohydrates. Some strains are currently used as probiotics. In this study, we performed a comparative genomic analysis of 181 genomes of the Lacticaseibacillus paracasei species to reveal genomic differences at the subspecies level and to reveal adaptive and probiotic features, and special emphasis is given to inulin consumption. No clear distinction at the subspecies level for L. paracasei was shown using a phylogenetic tree with orthologous genes from the core-genome set. In general, a good correlation was observed between genomic distance and isolation origin, suggesting that L. paracasei strains are adapted to their natural habitat, giving rise to genetic differences at the genomic level. A low frequency of undesirable characteristics such as plasmids, prophages, antibiotic resistance genes, absence of virulence factors, and frequent bacteriocin production supports these species being good candidates for use as probiotics. Lastly, we found that the inulin gene cluster in L. paracasei strains seems to differ slightly in the presence or absence of some genes but maintains a core defined by at least three fructose-PTS proteins, one hypothetical protein, and extracellular β-fructosidase. Finally, we conclude that further work has to be done for L. paracasei subspecies classification. Improving outgroup selection criteria is a key factor for their correct subspecies assignation.}, } @article {pmid36201053, year = {2022}, author = {Karthik, K and Anbazhagan, S and Chitra, MA and Sridhar, R}, title = {First report of the whole genome of Moraxella bovoculi genotype 1 from India and comparative genomics of Moraxella bovoculi to identify genotype-specific markers.}, journal = {Archives of microbiology}, volume = {204}, number = {11}, pages = {663}, pmid = {36201053}, issn = {1432-072X}, mesh = {Animals ; Cattle ; *Cattle Diseases/diagnosis ; Fimbriae Proteins ; Genomics ; Genotype ; *Keratoconjunctivitis ; *Keratoconjunctivitis, Infectious ; Moraxella/genetics ; *Moraxellaceae Infections/diagnosis/veterinary ; Vaccines, Synthetic ; Virulence Factors/genetics ; }, abstract = {Moraxella bovoculi has been isolated frequently from cattle with Infectious bovine keratoconjunctivitis (IBK). Two diverse genotypes of M. bovoculi, 1 and 2 were identified based on whole genome sequence analysis. It is essential to discriminate between the two genotypes to frame prevention and control measures. The whole genome of M. bovoculi TN7 was sequenced and compared to other M. bovoculi strains available in the NCBI database. M. bovoculi TN7 was found to be genotype 1, had an RTX toxin operon and pilA gene that are the known virulence factors in related Moraxella sp., but lacked antimicrobial resistance genes. M. bovoculi was found to have an open pangenome with 4051 (75.31%) accessory genes, and the addition of each new genome adds 18 genes to the pangenome. Comparison of pilin protein amino acid sequences revealed three new sequence types. Furthermore, the presence of linx, nagL, swrC and mdtA genes was found to be genotype 1 specific, whereas hyaD, garR, gbsA, yhdG, gabT, iclR, higB2, hmuU, hmuT and hemS were found only in genotype 2. Polymerase Chain Reaction (PCR) primers were designed and evaluated on strain TN7 plus seven additional strains accessible to us that had not been whole genome sequenced. This initial evaluation of the designed primers for the linX and hyaD genes produced the expected banding patterns on PCR gels for genotypes 1 and 2, respectively, among the 8 strains. The genotype-specific genes identified in this study can be used as markers for accurate diagnosis of genotype 1 isolates and this can aid in the development of autogenous or other molecular vaccines for treatment of infectious bovine keratoconjunctivitis (IBK) in resource-limited research settings.}, } @article {pmid36200898, year = {2022}, author = {Hildebrand, RE and Chandrasekar, SS and Riel, M and Touray, BJB and Aschenbroich, SA and Talaat, AM}, title = {Superinfection with SARS-CoV-2 Has Deleterious Effects on Mycobacterium bovis BCG Immunity and Promotes Dissemination of Mycobacterium tuberculosis.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0307522}, pmid = {36200898}, issn = {2165-0497}, mesh = {Mice ; Animals ; *Mycobacterium tuberculosis ; *Mycobacterium bovis ; Interleukin-17 ; SARS-CoV-2 ; BCG Vaccine ; *Superinfection ; *COVID-19 ; *Tuberculosis, Lymph Node ; Cytokines ; }, abstract = {An estimated one-third of the world's population is infected with Mycobacterium tuberculosis, with the majority being vaccinated with Mycobacterium bovis BCG. Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) remains a threat, and we must understand how SARS-CoV-2 can modulate both BCG immunity and tuberculosis pathogenesis. Interestingly, neither BCG vaccination nor tuberculosis infection resulted in differences in clinical outcomes associated with SARS-CoV-2 in transgenic mice. Surprisingly, earlier M. tuberculosis infection resulted in lower SARS-CoV-2 viral loads, mediated by the heightened immune microenvironment of the murine lungs, unlike vaccination with BCG, which had no impact. In contrast, M. tuberculosis-infected tissues had increased bacterial loads and decreased histiocytic inflammation in the lungs following SARS-CoV-2 superinfection. SARS-CoV-2 modulated BCG-induced type 17 responses while decreasing type 1 and increasing type 2 cytokines in M. tuberculosis-infected mice. These findings challenge initial findings of BCG's positive impact on SARS-CoV-2 infection and suggest potential ramifications for M. tuberculosis reactivation upon SARS-CoV-2 superinfection. IMPORTANCE Prior to SARS-CoV-2, M. tuberculosis was the leading infectious disease killer, with an estimated one-third of the world's population infected and 1.7 million deaths a year. Here, we show that SARS-CoV-2 superinfection caused increased bacterial dissemination in M. tuberculosis-infected mice along with immune and pathological changes. SARS-CoV-2 also impacted the immunity of BCG-vaccinated mice, resulting in decreased interleukin-17 (IL-17) levels, while offering no protective effect against SARS-CoV-2. These results demonstrate that SARS-CoV-2 may have a deleterious effect on the ongoing M. tuberculosis pandemic and potentially limit BCG's efficacy.}, } @article {pmid36200250, year = {2023}, author = {Srivastava, P and Jain, CK}, title = {Computer Aided Reverse Vaccinology: A Game-changer Approach for Vaccine Development.}, journal = {Combinatorial chemistry & high throughput screening}, volume = {26}, number = {10}, pages = {1813-1821}, doi = {10.2174/1386207325666220930124013}, pmid = {36200250}, issn = {1875-5402}, mesh = {*Artificial Intelligence ; Vaccinology/methods ; *Vaccines ; Computational Biology/methods ; Computers ; Vaccine Development ; }, abstract = {One of the most dynamic approaches in biotechnology is reverse vaccinology, which plays a huge role in today's developing vaccines. It has the capability of exploring and identifying the most potent vaccine candidate in a limited period of time. The first successful novel approach of reverse vaccinology was observed in Neisseria meningitidis serogroup B, which has revolutionised the whole field of computational biology. In this review, we have summarized the application of reverse vaccinology for different infectious diseases, discussed epitope prediction and various available bioinformatic tools, and explored the advantages, limitations and necessary elements of this approach. Some of the modifications in the reverse vaccinology approach, like pan-genome and comparative reverse vaccinology, are also outlined. Vaccines for illnesses like AIDS and hepatitis C have not yet been developed. Computer Aided Reverse vaccinology has the potential to be a game-changer in this area. The use of computational tools, pipelines and advanced soft-computing methods, such as artificial intelligence and deep learning, and exploitation of available omics data in integration have paved the way for speedy and effective vaccine designing. Is reverse vaccinology a viable option for developing vaccines against such infections, or is it a myth? Vaccine development gained momentum after the spread of various infections, resulting in numerous deaths; these vaccines are developed using the traditional technique, which includes inactivated microorganisms. As a result, reverse vaccinology may be a far superior technique for creating an effective vaccine.}, } @article {pmid36195962, year = {2022}, author = {Grytten, I and Dagestad Rand, K and Sandve, GK}, title = {KAGE: fast alignment-free graph-based genotyping of SNPs and short indels.}, journal = {Genome biology}, volume = {23}, number = {1}, pages = {209}, pmid = {36195962}, issn = {1474-760X}, mesh = {Algorithms ; Bayes Theorem ; Genome, Human ; Genotype ; Genotyping Techniques ; High-Throughput Nucleotide Sequencing ; Humans ; *INDEL Mutation ; *Polymorphism, Single Nucleotide ; Sequence Analysis, DNA ; }, abstract = {Genotyping is a core application of high-throughput sequencing. We present KAGE, a genotyper for SNPs and short indels that is inspired by recent developments within graph-based genome representations and alignment-free methods. KAGE uses a pan-genome representation of the population to efficiently and accurately predict genotypes. Two novel ideas improve both the speed and accuracy: a Bayesian model incorporates genotypes from thousands of individuals to improve prediction accuracy, and a computationally efficient method leverages correlation between variants. We show that the accuracy of KAGE is at par with the best existing alignment-free genotypers, while being an order of magnitude faster.}, } @article {pmid36190436, year = {2022}, author = {Xu, Z and Yuan, C}, title = {Molecular Epidemiology of Staphylococcus aureus in China Reveals the Key Gene Features Involved in Epidemic Transmission and Adaptive Evolution.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0156422}, pmid = {36190436}, issn = {2165-0497}, mesh = {Humans ; Animals ; Staphylococcus aureus ; Molecular Epidemiology/methods ; Phylogeny ; *Staphylococcal Infections/epidemiology ; *Anti-Infective Agents ; Anti-Bacterial Agents ; Microbial Sensitivity Tests ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Evolution, Molecular ; }, abstract = {Staphylococcus aureus is a Gram-positive pathogen that causes various infections in humans and domestic animals. In China, S. aureus is the most common Gram-positive pathogen that causes clinical infections. However, there are few comprehensive genome-based molecular epidemiology studies to investigate the genotypic background of the major S. aureus clones that are epidemic in China. Here, four S. aureus isolates that were recovered from hospital personnel were sequenced. In combination with whole-genome sequencing (WGS) data of 328 S. aureus strains as references, we performed a comprehensive molecular epidemiology study to reveal the molecular epidemic characterization of S. aureus that is epidemic in China. It was found that 332 S. aureus isolates were phylogenetically categorized into 4 major epidemic groups with different epidemiology phenotypes. Each group has exclusive features in virulence genotypic profiles, antimicrobial resistance genotypic profiles, core and pangenome features representing the differences involved in genetic features, evolutionary processes, and potential future evolutionary directions. Moreover, a comparative core genome analysis of 332 S. aureus isolates indicated several key genes that contributed to differences in molecular epidemic characterization and promoted the adaptive evolutionary process of each group. This study provides a comprehensive understanding of molecular epidemiological characteristics and adaptive evolutionary directions of major S. aureus clones that are epidemic in China. IMPORTANCE Staphylococcus aureus is an important Gram-positive pathogen that is epidemic worldwide and causes various infections in humans and domestic animals. However, there has been relatively little research on comprehensive molecular epidemiology in China. In this research, we reconstructed the phylogenetic relationship based on whole-genome data of strains almost all over China, screened for resistance and virulence genes, and took core and pan genome analysis to perform a comprehensive molecular epidemiology study of S. aureus that is epidemic in China. Our results highlight that there are 4 major epidemic groups with different epidemiology phenotypes after phylogenetic categorization with exclusive genetic features in virulence genotypic profiles, antimicrobial-resistance genotypic profiles, and core and pangenome features, and we found key gene features involved in epidemic transmission and adaptive evolution. Our findings are critical in describing molecular characteristic profiles of S. aureus infection, which could update existing preventive measures and take appropriate strategies.}, } @article {pmid36190419, year = {2022}, author = {Park, M and Kim, J and Horn, L and Haan, J and Strickland, A and Lappi, V and Boxrud, D and Hedberg, C and Ryu, S and Jeon, B}, title = {Sugar Modification of Wall Teichoic Acids Determines Serotype-Dependent Strong Biofilm Production in Listeria monocytogenes.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0276922}, pmid = {36190419}, issn = {2165-0497}, mesh = {Humans ; *Listeria monocytogenes/genetics ; Serogroup ; Teichoic Acids ; Phylogeny ; Sugars ; Rhamnose ; Biofilms ; Serotyping ; Food Microbiology ; }, abstract = {Biofilm production is responsible for persistent food contamination by Listeria monocytogenes, threatening food safety and public health. Human infection and food contamination with L. monocytogenes are caused primarily by serotypes 1/2a, 1/2b, and 4b. However, the association of biofilm production with phylogenic lineage and serotype has not yet been fully understood. In this study, we measured the levels of biofilm production in 98 clinical strains of L. monocytogenes at 37°C, 25°C, and 4°C. The phylogenetic clusters grouped by core genome multilocus sequence typing (cgMLST) exhibited association between biofilm production and phylogenetic lineage and serotype. Whereas clusters 1 and 3 consisting of serotype 4b strains exhibited weak biofilm production, clusters 2 (serotype 1/2b) and 4 (serotype 1/2a) were composed of strong biofilm formers. Particularly, cluster 2 (serotype 1/2b) strains exhibited the highest levels of biofilm production at 37°C, and the levels of biofilm production of cluster 4 (serotype 1/2a) strains were significantly elevated at all tested temperatures. Pan-genome analysis identified 22 genes unique to strong biofilm producers, most of which are related to the synthesis and modification of teichoic acids. Notably, a knockout mutation of the rml genes related to the modification of wall teichoic acids with l-rhamnose, which is specific to serogroup 1/2, significantly reduced the level of biofilm production by preventing biofilm maturation. Here, the results of our study show that biofilm production in L. monocytogenes is related to phylogeny and serotype and that the modification of wall teichoic acids with l-rhamnose is responsible for serotype-specific strong biofilm formation in L. monocytogenes. IMPORTANCE Biofilm formation on the surface of foods or food-processing facilities by L. monocytogenes is a serious food safety concern. Here, our data demonstrate that the level of biofilm production differs among serotypes 1/2a, 1/2b, and 4b depending on the temperature. Furthermore, sugar decoration of bacterial cell walls with l-rhamnose is responsible for strong biofilm production in serotypes 1/2a and 1/2b, commonly isolated from foods and listeriosis cases. The findings in this study improve our understanding of the association of biofilm production with phylogenetic lineage and serotype in L. monocytogenes.}, } @article {pmid36189364, year = {2022}, author = {Lv, Z and Chen, Y and Zhou, H and Chen, Z and Yao, Q and Ren, J and Liu, X and Liu, S and Deng, X and Pang, Y and Chen, W and Yang, H and Xu, P}, title = {Genomic characterization of two metagenome-assembled genomes of Tropheryma whipplei from China.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {947486}, pmid = {36189364}, issn = {2235-2988}, mesh = {Fluoroquinolones ; Genomics ; *Metagenome ; Phylogeny ; *Tropheryma/genetics ; Virulence Factors ; }, abstract = {Whipple's disease is a rare chronic systemic disease that affects almost any organ system of the body caused by the intracellular bacterium Tropheryma whipplei, which is found ubiquitously in the environment. Sequencing of the T. whipplei genome has revealed that it has a reduced genome (0.93 Mbp), a characteristic shared with other intracellular bacteria. Until our research started, 19 T. whipplei strains had been sequenced from cultures originated in France, Canada, and Germany. The genome of T. whipplei bacterium has not been studied in Asia yet. Here, two metagenome-assembled genomes (MAGs) of T. whipplei from China were reconstructed through metagenomic next-generation sequencing (mNGS) and genome binning. We also provided genomic insights into the geographical role and genomic features by analyzing the whole genome. The whole-genome phylogenetic tree was constructed based on single-nucleotide polymorphism (SNP) distance calculations and then grouped by distance similarity. The phylogenetic tree shows inconsistencies with geographic origins, thus suggesting that the variations in geographical origins cannot explain the phylogenetic relationships among the 21 T. whipplei strains. The two Chinese strains were closely related to each other, and also found to be related to strains from Germany (T. whipplei TW08/27) and France (T. whipplei Bcu26 and T. whipplei Neuro1). Furthermore, the Average Nucleotide Identity (ANI) matrix also showed no association between geographic origins and genomic similarities. The pan-genome analysis revealed that T. whipplei has a closed pan-genome composed of big core-genomes and small accessory genomes, like other intracellular bacteria. By examining the genotypes of the sequenced strains, all 21 T. whipplei strains were found to be resistant to fluoroquinolones, due to the genetic mutations in genes gyrA, gyrB, parC, and parE. The 21 T. Whipplei strains shared the same virulence factors, except for the alpC gene, which existed in 7 out of the 21 T. whipplei strains. When comparing 21 entire T. whipplei pan-genomes from various nations, it was discovered that the bacterium also possessed a closed genome, which was a trait shared by intracellular pathogens.}, } @article {pmid36187939, year = {2022}, author = {Awori, RM}, title = {Nematophilic bacteria associated with entomopathogenic nematodes and drug development of their biomolecules.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {993688}, pmid = {36187939}, issn = {1664-302X}, abstract = {The importance of Xenorhabdus and Photorhabdus symbionts to their respective Steinernema and Heterorhabditis nematode hosts is that they not only contribute to their entomopathogenicity but also to their fecundity through the production of small molecules. Thus, this mini-review gives a brief introductory overview of these nematophilic bacteria. Specifically, their type species, nematode hosts, and geographic region of isolations are tabulated. The use of nucleotide sequence-based techniques for their species delineation and how pangenomes can improve this are highlighted. Using the Steinernema-Xenorhabdus association as an example, the bacterium-nematode lifecycle is visualized with an emphasis on the role of bacterial biomolecules. Those currently in drug development are discussed, and two potential antimalarial lead compounds are highlighted. Thus, this mini-review tabulates forty-eight significant nematophilic bacteria and visualizes the ecological importance of their biomolecules. It further discusses three of these biomolecules that are currently in drug development. Through it, one is introduced to Xenorhabdus and Photorhabdus bacteria, their natural production of biomolecules in the nematode-bacterium lifecycle, and how these molecules are useful in developing novel therapies.}, } @article {pmid36186424, year = {2022}, author = {Walker, AR and Shields, RC}, title = {Investigating CRISPR spacer targets and their impact on genomic diversification of Streptococcus mutans.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {997341}, pmid = {36186424}, issn = {1664-8021}, support = {R03 DE029882/DE/NIDCR NIH HHS/United States ; }, abstract = {CRISPR-Cas is a bacterial immune system that restricts the acquisition of mobile DNA elements. These systems provide immunity against foreign DNA by encoding CRISPR spacers that help target DNA if it re-enters the cell. In this way, CRISPR spacers are a type of molecular tape recorder of foreign DNA encountered by the host microorganism. Here, we extracted ∼8,000 CRISPR spacers from a collection of over three hundred Streptococcus mutans genomes. Phage DNA is a major target of S. mutans spacers. S. mutans strains have also generated immunity against mobile DNA elements such as plasmids and integrative and conjugative elements. There may also be considerable immunity generated against bacterial DNA, although the relative contribution of self-targeting versus bona fide intra- or inter-species targeting needs to be investigated further. While there was clear evidence that these systems have acquired immunity against foreign DNA, there appeared to be minimal impact on horizontal gene transfer (HGT) constraints on a species-level. There was little or no impact on genome size, GC content and 'openness' of the pangenome when comparing between S. mutans strains with low or high CRISPR spacer loads. In summary, while there is evidence of CRISPR spacer acquisition against self and foreign DNA, CRISPR-Cas does not act as a barrier on the expansion of the S. mutans accessory genome.}, } @article {pmid36185029, year = {2022}, author = {Wang, S and Sun, L and Narsing Rao, MP and Fang, BZ and Li, WJ}, title = {Comparative Genome Analysis of a Novel Alkaliphilic Actinobacterial Species Nesterenkonia haasae.}, journal = {Polish journal of microbiology}, volume = {71}, number = {3}, pages = {453-461}, pmid = {36185029}, issn = {2544-4646}, mesh = {Adenosine Triphosphate ; Base Composition ; DNA, Bacterial/genetics ; Fatty Acids ; Membrane Transport Proteins/genetics ; *Nitrates ; Nitrite Reductases/genetics ; Nucleic Acid Hybridization ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Sulfites ; *Thiosulfates ; Urea ; }, abstract = {In the present study, a comparative genome analysis of the novel alkaliphilic actinobacterial Nesterenkonia haasae with other members of the genus Nesterenkonia was performed. The genome size of Nesterenkonia members ranged from 2,188,008 to 3,676,111 bp. N. haasae and Nesterenkonia members of the present study encode the essential glycolysis and pentose phosphate pathway genes. In addition, some Nesterenkonia members encode the crucial genes for Entner-Doudoroff pathways. Some Nesterenkonia members possess the genes responsible for sulfate/thiosulfate transport system permease protein/ ATP-binding protein and conversion of sulfate to sulfite. Nesterenkonia members also encode the genes for assimilatory nitrate reduction, nitrite reductase, and the urea cycle. All Nesterenkonia members have the genes to overcome environmental stress and produce secondary metabolites. The present study helps to understand N. haasae and Nesterenkonia members' environmental adaptation and niches specificity based on their specific metabolic properties. Further, based on genome analysis, we propose reclassifying Nesterenkonia jeotgali as a later heterotypic synonym of Nesterenkonia sandarakina.}, } @article {pmid36183960, year = {2022}, author = {Sharma, V and Sood, A and Ray, P and Angrup, A}, title = {Comparative genomics reveals the evolution of antimicrobial resistance in Bacteroides nordii.}, journal = {Microbial pathogenesis}, volume = {173}, number = {Pt A}, pages = {105811}, doi = {10.1016/j.micpath.2022.105811}, pmid = {36183960}, issn = {1096-1208}, mesh = {*Anti-Bacterial Agents/pharmacology ; Drug Resistance, Multiple, Bacterial ; Metronidazole ; Drug Resistance, Bacterial/genetics ; Genomics ; *Anti-Infective Agents/pharmacology ; }, abstract = {Bacteroides nordii, is an understudied member of the pathogenic B. fragilis group which comprises several multidrug-resistant (MDR) strains. Thus, it is of great interest to study the genome biology of Bacteroides nordii. However, no detailed study is available that characterized B. nordii at the genetic level and explored its role as a potential pathogen. We isolated an MDR strain viz., B. nordii PGMM4098 from the pus sample and subjected it to whole genome sequencing using Illumina technology. The draft genome was de-novo assembled and annotated, followed by comprehensive comparative genomics analyses using the publicly available genome dataset of B. nordii. The pan-genome analysis revealed the open nature of B. nordii, indicating the continuous accumulation of novel genes in non-core components leading to the emergence of new strains of this species. The thirteen antimicrobial resistance (AMR) genes identified in the genomes of all B. nordii strains were part of the non-core component of the pan-genome. Of these, four AMR genes, nimE, aadS, mef(En2), and ermB/F/G were found to be acquired via the process of horizontal gene transfer (HGT) from anaerobic Bacteroidetes. Importantly, the nimE gene conferring metronidazole resistance was found to be present only in B. nordii PGMM4098, which harbors five other AMR genes encoded in its genome. Of these, nimE (metronidazole resistance), ermB/F/G (macrolide-lincosamide-streptogramin B resistance), and cfxA2/A3 (class A β-lactam resistance) genes were further validated using targeted polymerase chain reaction assay. Notably, these three genes were also found to be under the operation of positive selective pressure suggesting the diversification of these genes, which might lead to the emergence of new MDR strains of B. nordii in the near future. Our study reported and characterized the genome of the first MDR strain of B. nordii and revealed the AMR evolution in this species using a comprehensive comparative genomics approach.}, } @article {pmid36183957, year = {2022}, author = {Yan, S and Liu, X and Li, C and Jiang, Z and Li, D and Zhu, L}, title = {Genomic virulence genes profile analysis of Salmonella enterica isolates from animal and human in China from 2004 to 2019.}, journal = {Microbial pathogenesis}, volume = {173}, number = {Pt A}, pages = {105808}, doi = {10.1016/j.micpath.2022.105808}, pmid = {36183957}, issn = {1096-1208}, mesh = {Animals ; Humans ; *Salmonella enterica ; Virulence/genetics ; *Salmonella Infections, Animal/epidemiology ; Virulence Factors/genetics ; Salmonella ; Genomics ; }, abstract = {Salmonella is a momentously zoonotic and food-borne pathogen that seriously threats human and animal health around the world. Salmonella pathogenicity is closely related to its virulence genes profile. However, conventional virulence gene analysis methods cannot truly reveal whole virulence genes carried by Salmonella. In this study, whole genome sequencing in combination with Virulence Factor Database were applied to investigate whole virulence gene profiles of 243 Salmonella isolates from animals and humans in China from 2004 to 2019. The results showed that a total of 670 virulence genes were identified in Salmonella, among them, 319 virulence genes were found in all the Salmonella tested isolates, and 9 virulence genes were unique to Salmonella. The 670 virulence genes were classified into 14 categories according to their functions, and the genes related to adherence, effector delivery system, immune modulation, motility and nutritional/metabolic factors accounted for 84.63%. Relationships between virulence genes and serovars, sequence types indicated that strains belonged to the same serovar or sequence type had similar virulence genes profiles, however, isolates from different sources, years and locations of isolation had variable virulence gene profiles. In addition, copy number of virulence genes and homologous virulence genes shared with other pathogens were also analyzed in this study. In summary, we investigated pan-genomic virulence gene profiles and molecular epidemiology of Salmonella isolates from humans and animals in China from 2004 to 2019. These findings are beneficial for pathogenic monitoring, investigation of virulence evolution as well as prevention and control of Salmonella.}, } @article {pmid36179091, year = {2022}, author = {Sirén, J and Paten, B}, title = {GBZ file format for pangenome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {22}, pages = {5012-5018}, pmid = {36179091}, issn = {1367-4811}, support = {R01 HG010485/HG/NHGRI NIH HHS/United States ; U01HG010961/NH/NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; OT2 OD026682/OD/NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; }, mesh = {High-Throughput Nucleotide Sequencing ; Software ; *Data Compression ; *Libraries ; }, abstract = {MOTIVATION: Pangenome graphs representing aligned genome assemblies are being shared in the text-based Graphical Fragment Assembly format. As the number of assemblies grows, there is a need for a file format that can store the highly repetitive data space efficiently.

RESULTS: We propose the GBZ file format based on data structures used in the Giraffe short-read aligner. The format provides good compression, and the files can be efficiently loaded into in-memory data structures. We provide compression and decompression tools and libraries for using GBZ graphs, and we show that they can be efficiently used on a variety of systems.

C++ and Rust implementations are available at https://github.com/jltsiren/gbwtgraph and https://github.com/jltsiren/gbwt-rs, respectively.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid36169590, year = {2022}, author = {Abbas, M and Abid, MA and Meng, Z and Abbas, M and Wang, P and Lu, C and Askari, M and Akram, U and Ye, Y and Wei, Y and Wang, Y and Guo, S and Liang, C and Zhang, R}, title = {Integrating advancements in root phenotyping and genome-wide association studies to open the root genetics gateway.}, journal = {Physiologia plantarum}, volume = {174}, number = {6}, pages = {e13787}, doi = {10.1111/ppl.13787}, pmid = {36169590}, issn = {1399-3054}, support = {31771850//National Natural Science Foundation of China/ ; 32072115//National Natural Science Foundation of China/ ; //National Program for Support of Top-Notch Young Professionals/ ; //"Outstanding Agricultural Science Talent" of CAAS/ ; //Agricultural Science and Technology Innovation Program of Chinese Academy of Agricultural Sciences/ ; }, mesh = {*Quantitative Trait Loci ; *Genome-Wide Association Study ; Plant Roots/genetics ; Phenotype ; Plants/genetics ; }, abstract = {Plant adaptation to challenging environmental conditions around the world has made root growth and development an important research area for plant breeders and scientists. Targeted manipulation of root system architecture (RSA) to increase water and nutrient use efficiency can minimize the adverse effects of climate change on crop production. However, phenotyping of RSA is a major bottleneck since the roots are hidden in the soil. Recently the development of 2- and 3D root imaging techniques combined with the genome-wide association studies (GWASs) have opened up new research tools to identify the genetic basis of RSA. These approaches provide a comprehensive understanding of the RSA, by accelerating the identification and characterization of genes involved in root growth and development. This review summarizes the latest developments in phenotyping techniques and GWAS for RSA, which are used to map important genes regulating various aspects of RSA under varying environmental conditions. Furthermore, we discussed about the state-of-the-art image analysis tools integrated with various phenotyping platforms for investigating and quantifying root traits with the highest phenotypic plasticity in both artificial and natural environments which were used for large scale association mapping studies, leading to the identification of RSA phenotypes and their underlying genetics with the greatest potential for RSA improvement. In addition, challenges in root phenotyping and GWAS are also highlighted, along with future research directions employing machine learning and pan-genomics approaches.}, } @article {pmid36169225, year = {2023}, author = {Zhang, Y and Zhang, H and Zhang, Z and Qian, Q and Zhang, Z and Xiao, J}, title = {ProPan: a comprehensive database for profiling prokaryotic pan-genome dynamics.}, journal = {Nucleic acids research}, volume = {51}, number = {D1}, pages = {D767-D776}, pmid = {36169225}, issn = {1362-4962}, mesh = {Archaea/genetics ; Bacteria/genetics ; *Genome ; Genome, Bacterial ; Genomics ; *Prokaryotic Cells ; *Databases, Genetic ; }, abstract = {Compared with conventional comparative genomics, the recent studies in pan-genomics have provided further insights into species genomic dynamics, taxonomy and identification, pathogenicity and environmental adaptation. To better understand genome characteristics of species of interest and to fully excavate key metabolic and resistant genes and their conservations and variations, here we present ProPan (https://ngdc.cncb.ac.cn/propan), a public database covering 23 archaeal species and 1,481 bacterial species (in a total of 51,882 strains) for comprehensively profiling prokaryotic pan-genome dynamics. By analyzing and integrating these massive datasets, ProPan offers three major aspects for the pan-genome dynamics of the species of interest: 1) the evaluations of various species' characteristics and composition in pan-genome dynamics; 2) the visualization of map association, the functional annotation and presence/absence variation for all contained species' gene clusters; 3) the typical characteristics of the environmental adaptation, including resistance genes prediction of 126 substances (biocide, antimicrobial drug and metal) and evaluation of 31 metabolic cycle processes. Besides, ProPan develops a very user-friendly interface, flexible retrieval and multi-level real-time statistical visualization. Taken together, ProPan will serve as a weighty resource for the studies of prokaryotic pan-genome dynamics, taxonomy and identification as well as environmental adaptation.}, } @article {pmid36166154, year = {2023}, author = {Duarte, IO and Hissa, DC and Quintela, BCSF and Rabelo, MC and Oliveira, FADS and Lima, NCB and Melo, VMM}, title = {Genomic Analysis of Surfactant-Producing Bacillus vallismortis TIM68: First Glimpse at Species Pangenome and Prediction of New Plipastatin-Like Lipopeptide.}, journal = {Applied biochemistry and biotechnology}, volume = {195}, number = {2}, pages = {753-771}, pmid = {36166154}, issn = {1559-0291}, support = {312462/2017-4//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; }, mesh = {*Lipopeptides/pharmacology ; Surface-Active Agents/pharmacology/chemistry ; *Bacillus/genetics ; Genomics ; }, abstract = {Surfactants are applied in several industrial processes when the modification of interface activity and the stability of colloidal systems are required. Lipopeptides are a class of microbial biosurfactants produced by species of the Bacillus genus. The present study aimed at assembling and analyzing the genome of a new Bacillus vallismortis strain, TIM68, that was shown to produce surfactant lipopeptides. The draft genome was also screened for common virulence factors and antibiotics resistance genes to investigate the strain biosafety. Comparative genomics analyses, i.e., synteny, average nucleotide identity (ANI), and pangenome, were also carried out using strain TIM68 and publicly available B. vallismortis complete and partial genomes. Three peptide synthetase operons were found in TIM68 genome, and they were surfactin A, mojavensin, and a novel plipastatin-like lipopeptide named vallisin. No virulence factors that render pathogenicity to the strain have been identified, but a region of prophage, that may contain unknown pathogenic factors, has been predicted. The pangenome of the species was characterized as closed, with 57% of genes integrating the core genome. The results obtained here on the genetic potential of TIM68 strain should contribute to its exploration in biotechnological applications.}, } @article {pmid36159272, year = {2022}, author = {Cherchame, E and Ilango, G and Noël, V and Cadel-Six, S}, title = {Polyphyly in widespread Salmonella enterica serovars and using genomic proximity to choose the best reference genome for bioinformatics analyses.}, journal = {Frontiers in public health}, volume = {10}, number = {}, pages = {963188}, pmid = {36159272}, issn = {2296-2565}, mesh = {Computational Biology ; Genomics ; Humans ; Multilocus Sequence Typing ; Phylogeny ; Salmonella ; *Salmonella enterica/genetics ; Serogroup ; United States ; }, abstract = {Salmonella is the most common cause of gastroenteritis in the world. Over the past 5 years, whole-genome analysis has led to the high-resolution characterization of clinical and foodborne Salmonella responsible for typhoid fever, foodborne illness or contamination of the agro-food chain. Whole-genome analyses are simplified by the availability of high-quality, complete genomes for mapping analysis and for calculating the pairwise distance between genomes, but unfortunately some difficulties may still remain. For some serovars, the complete genome is not available, or some serovars are polyphyletic and knowing the serovar alone is not sufficient for choosing the most appropriate reference genome. For these serovars, it is essential to identify the genetically closest complete genome to be able to carry out precise genome analyses. In this study, we explored the genomic proximity of 650 genomes of the 58 Salmonella enterica subsp. enterica serovars most frequently isolated in humans and from the food chain in the United States (US) and in Europe (EU), with a special focus on France. For each serovar, to take into account their genomic diversity, we included all the multilocus sequence type (MLST) profiles represented in EnteroBase with 10 or more genomes (on 19 July 2021). A phylogenetic analysis using both core- and pan-genome approaches was carried out to identify the genomic proximity of all the Salmonella studied and 20 polyphyletic serovars that have not yet been described in the literature. This study determined the genetic proximity between all 58 serovars studied and revealed polyphyletic serovars, their genomic lineages and MLST profiles. Finally, we enhanced the open-access databases with 73 new genomes and produced a list of high-quality complete reference genomes for 48 S. enterica subsp. enterica serovars among the most isolated in the US, EU, and France.}, } @article {pmid36156193, year = {2022}, author = {Smith-Zaitlik, T and Shibu, P and McCartney, AL and Foster, G and Hoyles, L and Negus, D}, title = {Extended genomic analyses of the broad-host-range phages vB_KmiM-2Di and vB_KmiM-4Dii reveal slopekviruses have highly conserved genomes.}, journal = {Microbiology (Reading, England)}, volume = {168}, number = {9}, pages = {}, doi = {10.1099/mic.0.001247}, pmid = {36156193}, issn = {1465-2080}, support = {MR/L01632X/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {*Anti-Infective Agents ; *Bacteriophages/genetics ; Endonucleases ; Genome, Viral ; Genomics/methods ; Host Specificity ; Sewage ; Water ; }, abstract = {High levels of antimicrobial resistance among members of the Klebsiella oxytoca complex (KoC) have led to renewed interest in the use of bacteriophage (phage) therapy to tackle infections caused by these bacteria. In this study we characterized two lytic phages, vB_KmiM-2Di and vB_KmiM-4Dii, that were isolated from sewage water against two GES-5-positive Klebsiella michiganensis strains (PS_Koxy2 and PS_Koxy4, respectively). ViPTree analysis showed both phages belonged to the genus Slopekvirus. rpoB gene-based sequence analysis of 108 presumptive K. oxytoca isolates (n=59 clinical, n=49 veterinary) found K. michiganensis to be more prevalent (46 % clinical and 43 % veterinary, respectively) than K. oxytoca (40 % clinical and 6 % veterinary, respectively). Host range analysis against these 108 isolates found both vB_KmiM-2Di and vB_KmiM-4Dii showed broad lytic activity against KoC species. Several hypothetical homing endonuclease genes were encoded within the genomes of both phages, which may contribute to their broad host range. Differences in the tail fibre protein may explain the non-identical host range of the two phages. Pangenome analysis of 24 slopekviruses found that genomes within this genus are highly conserved, with more than 50 % of all predicted coding sequences representing core genes at ≥95 % identity and ≥70 % coverage. Given their broad host ranges, our results suggest vB_KmiM-2Di and vB_KmiM-4Dii represent attractive potential therapeutics. In addition, current recommendations for phage-based pangenome analyses may require revision.}, } @article {pmid36153338, year = {2022}, author = {Tong, X and Han, MJ and Lu, K and Tai, S and Liang, S and Liu, Y and Hu, H and Shen, J and Long, A and Zhan, C and Ding, X and Liu, S and Gao, Q and Zhang, B and Zhou, L and Tan, D and Yuan, Y and Guo, N and Li, YH and Wu, Z and Liu, L and Li, C and Lu, Y and Gai, T and Zhang, Y and Yang, R and Qian, H and Liu, Y and Luo, J and Zheng, L and Lou, J and Peng, Y and Zuo, W and Song, J and He, S and Wu, S and Zou, Y and Zhou, L and Cheng, L and Tang, Y and Cheng, G and Yuan, L and He, W and Xu, J and Fu, T and Xiao, Y and Lei, T and Xu, A and Yin, Y and Wang, J and Monteiro, A and Westhof, E and Lu, C and Tian, Z and Wang, W and Xiang, Z and Dai, F}, title = {High-resolution silkworm pan-genome provides genetic insights into artificial selection and ecological adaptation.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {5619}, pmid = {36153338}, issn = {2041-1723}, mesh = {Animals ; *Bombyx/genetics ; *Diapause ; Domestication ; Genomics ; Silk/genetics ; }, abstract = {The silkworm Bombyx mori is an important economic insect for producing silk, the "queen of fabrics". The currently available genomes limit the understanding of its genetic diversity and the discovery of valuable alleles for breeding. Here, we deeply re-sequence 1,078 silkworms and assemble long-read genomes for 545 representatives. We construct a high-resolution pan-genome dataset representing almost the entire genomic content in the silkworm. We find that the silkworm population harbors a high density of genomic variants and identify 7308 new genes, 4260 (22%) core genes, and 3,432,266 non-redundant structure variations (SVs). We reveal hundreds of genes and SVs that may contribute to the artificial selection (domestication and breeding) of silkworm. Further, we focus on four genes responsible, respectively, for two economic (silk yield and silk fineness) and two ecologically adaptive traits (egg diapause and aposematic coloration). Taken together, our population-scale genomic resources will promote functional genomics studies and breeding improvement for silkworm.}, } @article {pmid36146509, year = {2022}, author = {Phanse, Y and Puttamreddy, S and Loy, D and Ramirez, JV and Ross, KA and Alvarez-Castro, I and Mogler, M and Broderick, S and Rajan, K and Narasimhan, B and Bartholomay, LC}, title = {RNA Nanovaccine Protects against White Spot Syndrome Virus in Shrimp.}, journal = {Vaccines}, volume = {10}, number = {9}, pages = {}, pmid = {36146509}, issn = {2076-393X}, support = {2015-67021-23309//United States Department of Agriculture/ ; 114370//United States Department of Agriculture/ ; }, abstract = {In the last 15 years, crustacean fisheries have experienced billions of dollars in economic losses, primarily due to viral diseases caused by such pathogens as white spot syndrome virus (WSSV) in the Pacific white shrimp Litopenaeus vannamei and Asian tiger shrimp Penaeus monodon. To date, no effective measures are available to prevent or control disease outbreaks in these animals, despite their economic importance. Recently, double-stranded RNA-based vaccines have been shown to provide specific and robust protection against WSSV infection in cultured shrimp. However, the limited stability of double-stranded RNA is the most significant hurdle for the field application of these vaccines with respect to delivery within an aquatic system. Polyanhydride nanoparticles have been successfully used for the encapsulation and release of vaccine antigens. We have developed a double-stranded RNA-based nanovaccine for use in shrimp disease control with emphasis on the Pacific white shrimp L. vannamei. Nanoparticles based on copolymers of sebacic acid, 1,6-bis(p-carboxyphenoxy)hexane, and 1,8-bis(p-carboxyphenoxy)-3,6-dioxaoctane exhibited excellent safety profiles, as measured by shrimp survival and histological evaluation. Furthermore, the nanoparticles localized to tissue target replication sites for WSSV and persisted through 28 days postadministration. Finally, the nanovaccine provided ~80% protection in a lethal WSSV challenge model. This study demonstrates the exciting potential of a safe, effective, and field-applicable RNA nanovaccine that can be rationally designed against infectious diseases affecting aquaculture.}, } @article {pmid36144349, year = {2022}, author = {Gontijo, JB and Paula, FS and Venturini, AM and Mandro, JA and Bodelier, PLE and Tsai, SM}, title = {Insights into the Genomic Potential of a Methylocystis sp. from Amazonian Floodplain Sediments.}, journal = {Microorganisms}, volume = {10}, number = {9}, pages = {}, pmid = {36144349}, issn = {2076-2607}, support = {2014/50320-4, 2017/26138-0, 2018/14974-0, 2019/25924-7, and 2019/25931-3//São Paulo Research Foundation/ ; }, abstract = {Although floodplains are recognized as important sources of methane (CH4) in the Amazon basin, little is known about the role of methanotrophs in mitigating CH4 emissions in these ecosystems. Our previous data reported the genus Methylocystis as one of the most abundant methanotrophs in these floodplain sediments. However, information on the functional potential and life strategies of these organisms living under seasonal flooding is still missing. Here, we described the first metagenome-assembled genome (MAG) of a Methylocystis sp. recovered from Amazonian floodplains sediments, and we explored its functional potential and ecological traits through phylogenomic, functional annotation, and pan-genomic approaches. Both phylogenomics and pan-genomics identified the closest placement of the bin.170_fp as Methylocystis parvus. As expected for Type II methanotrophs, the Core cluster from the pan-genome comprised genes for CH4 oxidation and formaldehyde assimilation through the serine pathway. Furthermore, the complete set of genes related to nitrogen fixation is also present in the Core. Interestingly, the MAG singleton cluster revealed the presence of unique genes related to nitrogen metabolism and cell motility. The study sheds light on the genomic characteristics of a dominant, but as yet unexplored methanotroph from the Amazonian floodplains. By exploring the genomic potential related to resource utilization and motility capability, we expanded our knowledge on the niche breadth of these dominant methanotrophs in the Amazonian floodplains.}, } @article {pmid36142806, year = {2022}, author = {Guche, MD and Pilati, S and Trenti, F and Dalla Costa, L and Giorni, P and Guella, G and Marocco, A and Lanubile, A}, title = {Functional Study of Lipoxygenase-Mediated Resistance against Fusarium verticillioides and Aspergillus flavus Infection in Maize.}, journal = {International journal of molecular sciences}, volume = {23}, number = {18}, pages = {}, pmid = {36142806}, issn = {1422-0067}, support = {protocol n. 5385//Cremona Agri-Food Technologies (CRAFT)/ ; 35th PhD in AgriFood and Environmental Sciences//Fondazione Edmund Mach/ ; }, mesh = {Aspergillus flavus/genetics/metabolism ; *Fumonisins ; *Fusarium/metabolism ; Lipoxygenase/genetics/metabolism ; *Mycotoxins/metabolism ; Oxylipins/metabolism ; Plant Diseases/genetics/microbiology ; Triglycerides/metabolism ; Zea mays/metabolism ; }, abstract = {Mycotoxin contamination of maize kernels by fungal pathogens like Fusarium verticillioides and Aspergillus flavus is a chronic global challenge impacting food and feed security, health, and trade. Maize lipoxygenase genes (ZmLOXs) synthetize oxylipins that play defense roles and govern host-fungal interactions. The current study investigated the involvement of ZmLOXs in maize resistance against these two fungi. A considerable intraspecific genetic and transcript variability of the ZmLOX family was highlighted by in silico analysis comparing publicly available maize pan-genomes and pan-transcriptomes, respectively. Then, phenotyping and expression analysis of ZmLOX genes along with key genes involved in oxylipin biosynthesis were carried out in a maize mutant carrying a Mu transposon insertion in the ZmLOX4 gene (named UFMulox4) together with Tzi18, Mo17, and W22 inbred lines at 3- and 7-days post-inoculation with F. verticillioides and A. flavus. Tzi18 showed the highest resistance to the pathogens coupled with the lowest mycotoxin accumulation, while UFMulox4 was highly susceptible to both pathogens with the most elevated mycotoxin content. F. verticillioides inoculation determined a stronger induction of ZmLOXs and maize allene oxide synthase genes as compared to A. flavus. Additionally, oxylipin analysis revealed prevalent linoleic (18:2) peroxidation by 9-LOXs, the accumulation of 10-oxo-11-phytoenoic acid (10-OPEA), and triglyceride peroxidation only in F. verticillioides inoculated kernels of resistant genotypes.}, } @article {pmid36142798, year = {2022}, author = {Lee, JH and Lee, SR and Han, S and Lee, PC}, title = {Comparative Genomic Analysis of Agarolytic Flavobacterium faecale WV33[T].}, journal = {International journal of molecular sciences}, volume = {23}, number = {18}, pages = {}, pmid = {36142798}, issn = {1422-0067}, support = {2020R1A2C3008889//National Research Foundation of Korea/ ; 20220258//Ministry of Oceans and Fisheries/ ; }, mesh = {Agar ; DNA, Bacterial/genetics ; Fatty Acids/metabolism ; *Flavobacteriaceae ; *Flavobacterium/genetics ; Genomics ; Nucleotides/metabolism ; Phylogeny ; Plastics/metabolism ; RNA/metabolism ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {Flavobacteria are widely dispersed in a variety of environments and produce various polysaccharide-degrading enzymes. Here, we report the complete genome of Flavobacterium faecale WV33[T], an agar-degrading bacterium isolated from the stools of Antarctic penguins. The sequenced genome of F. faecale WV33[T] represents a single circular chromosome (4,621,116 bp, 35.2% G + C content), containing 3984 coding DNA sequences and 85 RNA-coding genes. The genome of F. faecale WV33[T] contains 154 genes that encode carbohydrate-active enzymes (CAZymes). Among the CAZymes, seven putative genes encoding agarases have been identified in the genome. Transcriptional analysis revealed that the expression of these putative agarases was significantly enhanced by the presence of agar in the culture medium, suggesting that these proteins are involved in agar hydrolysis. Pangenome analysis revealed that the genomes of the 27 Flavobacterium type strains, including F. faecale WV33[T], tend to be very plastic, and Flavobacterium strains are unique species with a tiny core genome and a large non-core region. The average nucleotide identity and phylogenomic analysis of the 27 Flavobacterium-type strains showed that F. faecale WV33[T] was positioned in a unique clade in the evolutionary tree.}, } @article {pmid36141842, year = {2022}, author = {Ismail, S and Alsowayeh, N and Abbasi, HW and Albutti, A and Tahir Ul Qamar, M and Ahmad, S and Raza, RZ and Sadia, K and Abbasi, SW}, title = {Pan-Genome-Assisted Computational Design of a Multi-Epitopes-Based Vaccine Candidate against Helicobacter cinaedi.}, journal = {International journal of environmental research and public health}, volume = {19}, number = {18}, pages = {}, pmid = {36141842}, issn = {1660-4601}, mesh = {Computational Biology ; Cytokines ; Epitopes, B-Lymphocyte/chemistry ; Epitopes, T-Lymphocyte/chemistry ; *Helicobacter/genetics ; Interferons ; Molecular Docking Simulation ; Toll-Like Receptor 5 ; *Type VI Secretion Systems ; *Vaccines ; }, abstract = {Helicobacter cinaedi is a Gram-negative bacterium from the family Helicobacteraceae and genus Helicobacter. The pathogen is a causative agent of gastroenteritis, cellulitis, and bacteremia. The increasing antibiotic resistance pattern of the pathogen prompts the efforts to develop a vaccine to prevent dissemination of the bacteria and stop the spread of antibiotic resistance (AR) determinants. Herein, a pan-genome analysis of the pathogen strains was performed to shed light on its core genome and its exploration for potential vaccine targets. In total, four vaccine candidates (TonB dependent receptor, flagellar hook protein FlgE, Hcp family type VI secretion system effector, flagellar motor protein MotB) were identified as promising vaccine candidates and subsequently subjected to an epitopes' mapping phase. These vaccine candidates are part of the pathogen core genome: they are essential, localized at the pathogen surface, and are antigenic. Immunoinformatics was further applied on the selected vaccine proteins to predict potential antigenic, non-allergic, non-toxic, virulent, and DRB*0101 epitopes. The selected epitopes were then fused using linkers to structure a multi-epitopes' vaccine construct. Molecular docking simulations were conducted to determine a designed vaccine binding stability with TLR5 innate immune receptor. Further, binding free energy by MMGB/PBSA and WaterSwap was employed to examine atomic level interaction energies. The designed vaccine also stimulated strong humoral and cellular immune responses as well as interferon and cytokines' production. In a nutshell, the designed vaccine is promising in terms of immune responses' stimulation and could be an ideal candidate for experimental analysis due to favorable physicochemical properties.}, } @article {pmid36140772, year = {2022}, author = {Socarras, KM and Haslund-Gourley, BS and Cramer, NA and Comunale, MA and Marconi, RT and Ehrlich, GD}, title = {Large-Scale Sequencing of Borreliaceae for the Construction of Pan-Genomic-Based Diagnostics.}, journal = {Genes}, volume = {13}, number = {9}, pages = {}, pmid = {36140772}, issn = {2073-4425}, mesh = {*Borrelia burgdorferi/genetics ; Genome, Bacterial ; Genomics/methods ; Humans ; *Lyme Disease/genetics/microbiology ; Phylogeny ; }, abstract = {The acceleration of climate change has been associated with an alarming increase in the prevalence and geographic range of tick-borne diseases (TBD), many of which have severe and long-lasting effects-particularly when treatment is delayed principally due to inadequate diagnostics and lack of physician suspicion. Moreover, there is a paucity of treatment options for many TBDs that are complicated by diagnostic limitations for correctly identifying the offending pathogens. This review will focus on the biology, disease pathology, and detection methodologies used for the Borreliaceae family which includes the Lyme disease agent Borreliella burgdorferi. Previous work revealed that Borreliaceae genomes differ from most bacteria in that they are composed of large numbers of replicons, both linear and circular, with the main chromosome being the linear with telomeric-like termini. While these findings are novel, additional gene-specific analyses of each class of these multiple replicons are needed to better understand their respective roles in metabolism and pathogenesis of these enigmatic spirochetes. Historically, such studies were challenging due to a dearth of both analytic tools and a sufficient number of high-fidelity genomes among the various taxa within this family as a whole to provide for discriminative and functional genomic studies. Recent advances in long-read whole-genome sequencing, comparative genomics, and machine-learning have provided the tools to better understand the fundamental biology and phylogeny of these genomically-complex pathogens while also providing the data for the development of improved diagnostics and therapeutics.}, } @article {pmid36139949, year = {2022}, author = {Hurtado, R and Barh, D and Weimer, BC and Viana, MVC and Profeta, R and Sousa, TJ and Aburjaile, FF and Quino, W and Souza, RP and Mestanza, O and Gavilán, RG and Azevedo, V}, title = {WGS-Based Lineage and Antimicrobial Resistance Pattern of Salmonella Typhimurium Isolated during 2000-2017 in Peru.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {11}, number = {9}, pages = {}, pmid = {36139949}, issn = {2079-6382}, abstract = {Salmonella Typhimurium is associated with foodborne diseases worldwide, including in Peru, and its emerging antibiotic resistance (AMR) is now a global public health problem. Therefore, country-specific monitoring of the AMR emergence is vital to control this pathogen, and in these aspects, whole genome sequence (WGS)—based approaches are better than gene-based analyses. Here, we performed the antimicrobial susceptibility test for ten widely used antibiotics and WGS-based various analyses of 90 S. Typhimurium isolates (human, animal, and environment) from 14 cities of Peru isolated from 2000 to 2017 to understand the lineage and antimicrobial resistance pattern of this pathogen in Peru. Our results suggest that the Peruvian isolates are of Typhimurium serovar and predominantly belong to sequence type ST19. Genomic diversity analyses indicate an open pan-genome, and at least ten lineages are circulating in Peru. A total of 48.8% and 31.0% of isolates are phenotypically and genotypically resistant to at least one antibiotic, while 12.0% are multi-drug resistant (MDR). Genotype−phenotype correlations for ten tested drugs show >80% accuracy, and >90% specificity. Sensitivity above 90% was only achieved for ciprofloxacin and ceftazidime. Two lineages exhibit the majority of the MDR isolates. A total of 63 different AMR genes are detected, of which 30 are found in 17 different plasmids. Transmissible plasmids such as lncI-gamma/k, IncI1-I(Alpha), Col(pHAD28), IncFIB, IncHI2, and lncI2 that carry AMR genes associated with third-generation antibiotics are also identified. Finally, three new non-synonymous single nucleotide variations (SNVs) for nalidixic acid and eight new SNVs for nitrofurantoin resistance are predicted using genome-wide association studies, comparative genomics, and functional annotation. Our analysis provides for the first time the WGS-based details of the circulating S. Typhimurium lineages and their antimicrobial resistance pattern in Peru.}, } @article {pmid36138232, year = {2022}, author = {Yang, T and Liu, R and Luo, Y and Hu, S and Wang, D and Wang, C and Pandey, MK and Ge, S and Xu, Q and Li, N and Li, G and Huang, Y and Saxena, RK and Ji, Y and Li, M and Yan, X and He, Y and Liu, Y and Wang, X and Xiang, C and Varshney, RK and Ding, H and Gao, S and Zong, X}, title = {Improved pea reference genome and pan-genome highlight genomic features and evolutionary characteristics.}, journal = {Nature genetics}, volume = {54}, number = {10}, pages = {1553-1563}, pmid = {36138232}, issn = {1546-1718}, mesh = {Biological Evolution ; Genomics ; *Peas/genetics ; *Plant Breeding ; Quantitative Trait Loci/genetics ; }, abstract = {Complete and accurate reference genomes and annotations provide fundamental resources for functional genomics and crop breeding. Here we report a de novo assembly and annotation of a pea cultivar ZW6 with contig N50 of 8.98 Mb, which features a 243-fold increase in contig length and evident improvements in the continuity and quality of sequence in complex repeat regions compared with the existing one. Genome diversity of 118 cultivated and wild pea demonstrated that Pisum abyssinicum is a separate species different from P. fulvum and P. sativum within Pisum. Quantitative trait locus analyses uncovered two known Mendel's genes related to stem length (Le/le) and seed shape (R/r) as well as some candidate genes for pod form studied by Mendel. A pan-genome of 116 pea accessions was constructed, and pan-genes preferred in P. abyssinicum and P. fulvum showed distinct functional enrichment, indicating the potential value of them as pea breeding resources in the future.}, } @article {pmid36131075, year = {2022}, author = {Izydorczyk, C and Waddell, BJ and Weyant, RB and Surette, MG and Somayaji, R and Rabin, HR and Conly, JM and Church, DL and Parkins, MD}, title = {The natural history and genetic diversity of Haemophilus influenzae infecting the airways of adults with cystic fibrosis.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {15765}, pmid = {36131075}, issn = {2045-2322}, mesh = {Adult ; *Cystic Fibrosis/complications ; Genetic Variation ; *Haemophilus Infections ; Haemophilus influenzae ; Humans ; Phylogeny ; Retrospective Studies ; }, abstract = {Haemophilus influenzae is a Gram-negative pathobiont, frequently recovered from the airways of persons with cystic fibrosis (pwCF). Previous studies of H. influenzae infection dynamics and transmission in CF predominantly used molecular methods, lacking resolution. In this retrospective cohort study, representative yearly H. influenzae isolates from all pwCF attending the Calgary Adult CF Clinic with H. influenzae positive sputum cultures between 2002 and 2016 were typed by pulsed-field gel electrophoresis. Isolates with shared pulsotypes common to ≥ 2 pwCF were sequenced by Illumina MiSeq. Phylogenetic and pangenomic analyses were used to assess genetic relatedness within shared pulsotypes, and epidemiological investigations were performed to assess potential for healthcare associated transmission. H. influenzae infection was observed to be common (33% of patients followed) and dynamic in pwCF. Most infected pwCF exhibited serial infections with new pulsotypes (75% of pwCF with ≥ 2 positive cultures), with up to four distinct pulsotypes identified from individual patients. Prolonged infection by a single pulsotype was only rarely observed. Intra-patient genetic diversity was observed at the single-nucleotide polymorphism and gene content levels. Seven shared pulsotypes encompassing 39% of pwCF with H. influenzae infection were identified, but there was no evidence, within our sampling scheme, of direct patient-to-patient infection transmission.}, } @article {pmid36125674, year = {2022}, author = {Chávez-Luzanía, RA and Montoya-Martínez, AC and Parra-Cota, FI and de Los Santos-Villalobos, S}, title = {Pangenomes-identified singletons for designing specific primers to identify bacterial strains in a plant growth-promoting consortium.}, journal = {Molecular biology reports}, volume = {49}, number = {11}, pages = {10489-10498}, pmid = {36125674}, issn = {1573-4978}, support = {PROFAPI 2022_0001//ITSON/ ; }, mesh = {*Plant Development ; *Bacteria/genetics ; Triticum/genetics/microbiology ; Whole Genome Sequencing ; Crops, Agricultural/genetics ; }, abstract = {BACKGROUND: The use of plant growth-promoting microorganisms represents a sustainable way to increase agricultural yields and plant health. Thus, the identification and tracking of these microorganisms are determinants for validating their positive effects on crops. Pangenomes allow the identification of singletons that can be used to design specific primers for the detection of the studied strains.

OBJECTIVE: This study aimed to establish a strategy based on the use of whole-genome sequencing and pangenomes for designing and validating primer sets for detecting Bacillus cabrialesii TE3[T], Priestia megaterium TRQ8, and Bacillus paralicheniformis TRQ65, a promising beneficial bacterial consortium for wheat.

METHODS AND RESULTS: The identification of singletons of TE3[T], TRQ8, and TRQ65 was performed by pangenomes using the Kbase platform and subsequently analyzed using BLAST®. The identified DNA regions were used for primer design in AlleleID version 7. Primers were validated by multiplex PCR using pure template DNA from each studied strain, combinations of two or three DNA from these strains, and DNA from agricultural soil samples enriched (and not) with the bacterial consortium. Here, we report the first design of primers capable of detecting and identifying the beneficial strains TE3[T], TRQ8, and TRQ65.

CONCLUSIONS: The use of pangenomes allowed the distinction of unique sequences that enables the design of primers for specific identification of the studied bacterial strains. This strategy can be widely used for the design of primer sets to detect other strains of interest for combating biopiracy, and commercial protection of biological products, among other applications.}, } @article {pmid36124775, year = {2022}, author = {Li, T and Yin, Y}, title = {Critical assessment of pan-genomic analysis of metagenome-assembled genomes.}, journal = {Briefings in bioinformatics}, volume = {23}, number = {6}, pages = {}, pmid = {36124775}, issn = {1477-4054}, support = {58-8042-7-089//United States Department of Agriculture/ ; R21 AI171952/AI/NIAID NIH HHS/United States ; DBI-1933521//National Science Foundation/ ; R01 GM140370/GM/NIGMS NIH HHS/United States ; 2019-YIN//Nebraska Tobacco Settlement Biomedical Research Enhancement Funds/ ; }, mesh = {*Metagenome ; Phylogeny ; *Genome, Bacterial ; Genomics ; Sequence Analysis, DNA/methods ; Metagenomics/methods ; }, abstract = {Pan-genome analyses of metagenome-assembled genomes (MAGs) may suffer from the known issues with MAGs: fragmentation, incompleteness and contamination. Here, we conducted a critical assessment of pan-genomics of MAGs, by comparing pan-genome analysis results of complete bacterial genomes and simulated MAGs. We found that incompleteness led to significant core gene (CG) loss. The CG loss remained when using different pan-genome analysis tools (Roary, BPGA, Anvi'o) and when using a mixture of MAGs and complete genomes. Contamination had little effect on core genome size (except for Roary due to in its gene clustering issue) but had major influence on accessory genomes. Importantly, the CG loss was partially alleviated by lowering the CG threshold and using gene prediction algorithms that consider fragmented genes, but to a less degree when incompleteness was higher than 5%. The CG loss also led to incorrect pan-genome functional predictions and inaccurate phylogenetic trees. Our main findings were supported by a study of real MAG-isolate genome data. We conclude that lowering CG threshold and predicting genes in metagenome mode (as Anvi'o does with Prodigal) are necessary in pan-genome analysis of MAGs. Development of new pan-genome analysis tools specifically for MAGs are needed in future studies.}, } @article {pmid36123438, year = {2022}, author = {Vassallo, CN and Doering, CR and Littlehale, ML and Teodoro, GIC and Laub, MT}, title = {A functional selection reveals previously undetected anti-phage defence systems in the E. coli pangenome.}, journal = {Nature microbiology}, volume = {7}, number = {10}, pages = {1568-1579}, pmid = {36123438}, issn = {2058-5276}, support = {F32 GM139231/GM/NIGMS NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; }, mesh = {Antiviral Agents ; *Bacteriophages/genetics ; CRISPR-Cas Systems ; Escherichia coli/genetics ; Prophages/genetics ; }, abstract = {The ancient, ongoing coevolutionary battle between bacteria and their viruses, bacteriophages, has given rise to sophisticated immune systems including restriction-modification and CRISPR-Cas. Many additional anti-phage systems have been identified using computational approaches based on genomic co-location within defence islands, but these screens may not be exhaustive. Here we developed an experimental selection scheme agnostic to genomic context to identify defence systems in 71 diverse E. coli strains. Our results unveil 21 conserved defence systems, none of which were previously detected as enriched in defence islands. Additionally, our work indicates that intact prophages and mobile genetic elements are primary reservoirs and distributors of defence systems in E. coli, with defence systems typically carried in specific locations or hotspots. These hotspots encode dozens of additional uncharacterized defence system candidates. Our findings reveal an extended landscape of antiviral immunity in E. coli and provide an approach for mapping defence systems in other species.}, } @article {pmid36109518, year = {2022}, author = {Yu, Y and Zhang, Z and Dong, X and Yang, R and Duan, Z and Xiang, Z and Li, J and Li, G and Yan, F and Xue, H and Jiao, D and Lu, J and Lu, H and Zhang, W and Wei, Y and Fan, S and Li, J and Jia, J and Zhang, J and Ji, J and Liu, P and Lu, H and Zhao, H and Chen, S and Wei, C and Chen, H and Zhu, Z}, title = {Pangenomic analysis of Chinese gastric cancer.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {5412}, pmid = {36109518}, issn = {2041-1723}, mesh = {Asian People/genetics ; China ; Genome, Human ; Humans ; Lectins/genetics ; Receptors, Cell Surface/genetics ; *Stomach Neoplasms/genetics ; }, abstract = {Pangenomic study might improve the completeness of human reference genome (GRCh38) and promote precision medicine. Here, we use an automated pipeline of human pangenomic analysis to build gastric cancer pan-genome for 185 paired deep sequencing data (370 samples), and characterize the gene presence-absence variations (PAVs) at whole genome level. Genes ACOT1, GSTM1, SIGLEC14 and UGT2B17 are identified as highly absent genes in gastric cancer population. A set of genes from unaligned sequences with GRCh38 are predicted. We successfully locate one of predicted genes GC0643 on chromosome 9q34.2. Overexpression of GC0643 significantly inhibits cell growth, cell migration and invasion, cell cycle progression, and induces cell apoptosis in cancer cells. The tumor suppressor functions can be reversed by shGC0643 knockdown. The GC0643 is approved by NCBI database (GenBank: MW194843.1). Collectively, the robust pan-genome strategy provides a deeper understanding of the gene PAVs in the human cancer genome.}, } @article {pmid36109150, year = {2022}, author = {Ruggieri, AA and Livraghi, L and Lewis, JJ and Evans, E and Cicconardi, F and Hebberecht, L and Ortiz-Ruiz, Y and Montgomery, SH and Ghezzi, A and Rodriguez-Martinez, JA and Jiggins, CD and McMillan, WO and Counterman, BA and Papa, R and Van Belleghem, SM}, title = {A butterfly pan-genome reveals that a large amount of structural variation underlies the evolution of chromatin accessibility.}, journal = {Genome research}, volume = {32}, number = {10}, pages = {1862-1875}, pmid = {36109150}, issn = {1549-5469}, support = {P20 GM103475/GM/NIGMS NIH HHS/United States ; }, mesh = {Animals ; *Butterflies/genetics ; Chromatin/genetics ; DNA Transposable Elements/genetics ; Genomics ; INDEL Mutation ; Drosophila/genetics ; Evolution, Molecular ; }, abstract = {Despite insertions and deletions being the most common structural variants (SVs) found across genomes, not much is known about how much these SVs vary within populations and between closely related species, nor their significance in evolution. To address these questions, we characterized the evolution of indel SVs using genome assemblies of three closely related Heliconius butterfly species. Over the relatively short evolutionary timescales investigated, up to 18.0% of the genome was composed of indels between two haplotypes of an individual Heliconius charithonia butterfly and up to 62.7% included lineage-specific SVs between the genomes of the most distant species (11 Mya). Lineage-specific sequences were mostly characterized as transposable elements (TEs) inserted at random throughout the genome and their overall distribution was similarly affected by linked selection as single nucleotide substitutions. Using chromatin accessibility profiles (i.e., ATAC-seq) of head tissue in caterpillars to identify sequences with potential cis-regulatory function, we found that out of the 31,066 identified differences in chromatin accessibility between species, 30.4% were within lineage-specific SVs and 9.4% were characterized as TE insertions. These TE insertions were localized closer to gene transcription start sites than expected at random and were enriched for sites with significant resemblance to several transcription factor binding sites with known function in neuron development in Drosophila We also identified 24 TE insertions with head-specific chromatin accessibility. Our results show high rates of structural genome evolution that were previously overlooked in comparative genomic studies and suggest a high potential for structural variation to serve as raw material for adaptive evolution.}, } @article {pmid36107145, year = {2022}, author = {Bhat, SV and Maughan, H and Cameron, ADS and Yost, CK}, title = {Phylogenomic analysis of the genus Delftia reveals distinct major lineages with ecological specializations.}, journal = {Microbial genomics}, volume = {8}, number = {9}, pages = {}, pmid = {36107145}, issn = {2057-5858}, mesh = {Animals ; DNA, Bacterial/genetics ; *Delftia/genetics ; Humans ; *Metals, Heavy ; Phylogeny ; Sequence Analysis, DNA ; Sewage ; Soil ; }, abstract = {Delftia is a diverse betaproteobacterial genus with many strains having agricultural and industrial relevance, including plant-growth promotion, bioremediation of hydrocarbon-contaminated soils, and heavy metal immobilization. Delftia spp. are broadly distributed in the environment, and have been isolated from plant hosts as well as healthy and diseased animal hosts, yet the genetic basis of this ecological versatility has not been characterized. Here, we present a phylogenomic comparison of published Delftia genomes and show that the genus is divided into two well-supported clades: one 'Delftia acidovorans' clade with isolates from soils and plant rhizospheres, and a second 'Delftia lacustris and Delftia tsuruhatensis' clade with isolates from humans and sludge. The pan-genome inferred from 61 Delftia genomes contained over 28 000 genes, of which only 884 were found in all genomes. Analysis of industrially relevant functions highlighted the ecological versatility of Delftia and supported their role as generalists.}, } @article {pmid36106979, year = {2022}, author = {Jiang, C and Kasai, H and Mino, S and Romalde, JL and Sawabe, T}, title = {The pan-genome of Splendidus clade species in the family Vibrionaceae: Insights into evolution, adaptation, and pathogenicity.}, journal = {Environmental microbiology}, volume = {24}, number = {10}, pages = {4587-4606}, doi = {10.1111/1462-2920.16209}, pmid = {36106979}, issn = {1462-2920}, mesh = {Animals ; Carbohydrates ; Evolution, Molecular ; Genome, Bacterial/genetics ; Phylogeny ; *Vibrionaceae/genetics ; Virulence/genetics ; Virulence Factors/genetics ; Genome ; }, abstract = {The Splendidus clade is the largest clade in Vibrionaceae, and its members are often related to mortality of marine animals with huge economic losses. The molecular bases of their pathogenicity and virulence, however, remain largely unknown. In particular, the complete genome sequences of the Splendidus clade species are rarely registered, which is one of the obstacles to predict core and/or unique genes responsible for their adaptation and pathogenicity, and to perform a fine scale meta-transcriptome during bacterial infection to their hosts. In this study, we obtained the complete genomes of all type strains in the Splendidus clade and revealed that (1) different genome sizes (4.4-5.9 Mb) with V. lentus the biggest and most of them had several big plasmids, likely because of the different features on mobilome elements; (2) the Splendidus clade consists of 19 species except V. cortegadensis, and 3 sub-clades (SC) were defined with the 15 most closely related members as SC1; (3) different carbohydrate degradation preferences may be the result of environmental adaptation; and (4) a broad prediction of virulence factors (VFs) revealed core and species unique VF genes.}, } @article {pmid36097170, year = {2022}, author = {Rosconi, F and Rudmann, E and Li, J and Surujon, D and Anthony, J and Frank, M and Jones, DS and Rock, C and Rosch, JW and Johnston, CD and van Opijnen, T}, title = {A bacterial pan-genome makes gene essentiality strain-dependent and evolvable.}, journal = {Nature microbiology}, volume = {7}, number = {10}, pages = {1580-1592}, pmid = {36097170}, issn = {2058-5276}, support = {R01 DE027850/DE/NIDCR NIH HHS/United States ; R21 AI117247/AI/NIAID NIH HHS/United States ; U01 AI124302/AI/NIAID NIH HHS/United States ; R01 AI110724/AI/NIAID NIH HHS/United States ; R01 GM034496/GM/NIGMS NIH HHS/United States ; }, mesh = {*DNA Transposable Elements ; Genes, Essential/genetics ; *Genome, Bacterial/genetics ; Streptococcus pneumoniae/genetics ; Whole Genome Sequencing ; }, abstract = {Many bacterial species are represented by a pan-genome, whose genetic repertoire far outstrips that of any single bacterial genome. Here we investigate how a bacterial pan-genome might influence gene essentiality and whether essential genes that are initially critical for the survival of an organism can evolve to become non-essential. By using Transposon insertion sequencing (Tn-seq), whole-genome sequencing and RNA-seq on a set of 36 clinical Streptococcus pneumoniae strains representative of >68% of the species' pan-genome, we identify a species-wide 'essentialome' that can be subdivided into universal, core strain-specific and accessory essential genes. By employing 'forced-evolution experiments', we show that specific genetic changes allow bacteria to bypass essentiality. Moreover, by untangling several genetic mechanisms, we show that gene essentiality can be highly influenced by and/or be dependent on: (1) the composition of the accessory genome, (2) the accumulation of toxic intermediates, (3) functional redundancy, (4) efficient recycling of critical metabolites and (5) pathway rewiring. While this functional characterization underscores the evolvability potential of many essential genes, we also show that genes with differential essentiality remain important antimicrobial drug target candidates, as their inactivation almost always has a severe fitness cost in vivo.}, } @article {pmid36097169, year = {2022}, author = {Beavan, AJS and McInerney, JO}, title = {Gene essentiality evolves across a pangenome.}, journal = {Nature microbiology}, volume = {7}, number = {10}, pages = {1510-1511}, pmid = {36097169}, issn = {2058-5276}, mesh = {*Genomics ; *Software ; }, } @article {pmid36094203, year = {2022}, author = {Batarseh, TN and Morales-Cruz, A and Ingel, B and Roper, MC and Gaut, BS}, title = {Using Genomes and Evolutionary Analyses to Screen for Host-Specificity and Positive Selection in the Plant Pathogen Xylella fastidiosa.}, journal = {Applied and environmental microbiology}, volume = {88}, number = {18}, pages = {e0122022}, pmid = {36094203}, issn = {1098-5336}, mesh = {*Cellulases/genetics ; Histidine Kinase/genetics ; Host Specificity ; Phylogeny ; Plant Diseases/microbiology ; Plants/microbiology ; *Xylella/genetics ; }, abstract = {Xylella fastidiosa infects several economically important crops in the Americas, and it also recently emerged in Europe. Here, using a set of Xylella genomes reflective of the genus-wide diversity, we performed a pan-genome analysis based on both core and accessory genes for two purposes: (i) to test associations between genetic divergence and plant host species and (ii) to identify positively selected genes that are potentially involved in arms-race dynamics. For the former, tests yielded significant evidence for the specialization of X. fastidiosa to plant host species. This observation contributes to a growing literature suggesting that the phylogenetic history of X. fastidiosa lineages affects the host range. For the latter, our analyses uncovered evidence of positive selection across codons for 5.3% (67 of 1,257) of the core genes and 5.4% (201 of 3,691) of the accessory genes. These genes are candidates to encode interacting factors with plant and insect hosts. Most of these genes had unknown functions, but we did identify some tractable candidates, including nagZ_2, which encodes a beta-glucosidase that is important for Neisseria gonorrhoeae biofilm formation; cya, which modulates gene expression in pathogenic bacteria, and barA, a membrane associated histidine kinase that has roles in cell division, metabolism, and pili formation. IMPORTANCE Xylella fastidiosa causes devasting diseases to several critical crops. Because X. fastidiosa colonizes and infects many plant species, it is important to understand whether the genome of X. fastidiosa has genetic determinants that underlie specialization to specific host plants. We analyzed genome sequences of X. fastidiosa to investigate evolutionary relationships and to test for evidence of positive selection on specific genes. We found a significant signal between genome diversity and host plants, consistent with bacterial specialization to specific plant hosts. By screening for positive selection, we identified both core and accessory genes that may affect pathogenicity, including genes involved in biofilm formation.}, } @article {pmid36087828, year = {2023}, author = {Irfan, M and Tariq, M and Basharat, Z and Abid Khan, RM and Jahanzaeb, M and Shakeel, M and Nisa, ZU and Shahzad, M and Jahanzaib, M and Moin, ST and Hassan, SS and Khan, IA}, title = {Genomic analysis of Chryseobacterium indologenes and conformational dynamics of the selected DD-peptidase.}, journal = {Research in microbiology}, volume = {174}, number = {1-2}, pages = {103990}, doi = {10.1016/j.resmic.2022.103990}, pmid = {36087828}, issn = {1769-7123}, mesh = {Humans ; *Serine-Type D-Ala-D-Ala Carboxypeptidase ; Anti-Bacterial Agents/pharmacology/therapeutic use ; *Chryseobacterium/genetics ; Genomics ; }, abstract = {Chrysobacterium indologenes is an emerging MDR pathogen that belongs to the family Flavobacteriaceae. The genome of the C. indologenes, isolated from the nephrotic patient, was sequenced through Illumina MiSeq. The pangenomics of available 56 C. indologenes strains using BPGA revealed an open pangenome (n=5553 CDS), core genome (2141), and accessory genome (2013). The CEG/DEG database identified 662 essential genes that drastically reduced to 68 genes after non-homology analyses towards human and gut microbiome. Further filtering the data for other drug target prioritizing parameters resulted in 32 putative targets. Keeping in view the crucial role played in cell wall biosynthesis, dacB was selected as the final target that encodes D-alanyl-d-alanine carboxypeptidase/endopeptidase (DD-peptidase). The 3D structure of dacB was modelled and rendered to docking analyses against two compound libraries of African plants (n=6842) and Tibetan medicines (n=52). The ADMET profiling exhibited the physicochemical properties of final compounds. The MD simulations showed the stability of inhibitor-DD-peptidase complex and interactions in terms of RMSD, RMSF, binding free energy calculation and H-bonding. We propose that the novel compounds Leptopene and ZINC95486338 from our findings might be potent DD-peptidase inhibitors that could aid in the development of new antibiotic-resistant therapy for the emerging MDR C. indologenes.}, } @article {pmid36086997, year = {2022}, author = {Fisher, CR and Wilson, M and Scott, JG}, title = {A chromosome-level assembly of the widely used Rockefeller strain of Aedes aegypti, the yellow fever mosquito.}, journal = {G3 (Bethesda, Md.)}, volume = {12}, number = {11}, pages = {}, pmid = {36086997}, issn = {2160-1836}, support = {R21 AI149121/AI/NIAID NIH HHS/United States ; }, mesh = {Animals ; Humans ; *Aedes/genetics ; Mosquito Vectors/genetics ; *Yellow Fever/genetics ; Ecosystem ; Chromosomes ; }, abstract = {Aedes aegypti is the vector of important human diseases, and genomic resources are crucial in facilitating the study of A. aegypti and its ecosystem interactions. Several laboratory-acclimated strains of this mosquito have been established, but the most used strain in toxicology studies is "Rockefeller," which was originally collected and established in Cuba 130 years ago. A full-length genome assembly of another reference strain, "Liverpool," was published in 2018 and is the reference genome for the species (AaegL5). However, genetic studies with the Rockefeller strain are complicated by the availability of only the Liverpool strain as the reference genome. Differences between Liverpool and Rockefeller have been known for decades, particularly in the expression of genes relevant to mosquito behavior and vector control (e.g. olfactory). These differences indicate that AaegL5 is likely not fully representative of the Rockefeller genome, presenting potential impediments to research. Here, we present a chromosomal-level assembly and annotation of the Rockefeller genome and a comparative characterization vs the Liverpool genome. Our results set the stage for a pan-genomic approach to understanding evolution and diversity within this important disease vector.}, } @article {pmid36084857, year = {2022}, author = {Ribeiro, IDA and Bach, E and Passaglia, LMP}, title = {Alternative nitrogenase of Paenibacillus sonchi genomovar Riograndensis: An insight in the origin of Fe-nitrogenase in the Paenibacillaceae family.}, journal = {Molecular phylogenetics and evolution}, volume = {177}, number = {}, pages = {107624}, doi = {10.1016/j.ympev.2022.107624}, pmid = {36084857}, issn = {1095-9513}, mesh = {Nitrogen Fixation/genetics ; *Nitrogenase/genetics/metabolism ; *Paenibacillus/genetics/metabolism ; Phylogeny ; }, abstract = {Paenibacillus sonchi genomovar Riograndensis is a nitrogen-fixing bacteria isolated from wheat that displays diverse plant growth-promoting abilities. Beyond conventional Mo-nitrogenase, this organism also harbors an alternative Fe-nitrogenase, whose many aspects related to regulation, physiology, and evolution remain to be elucidated. In this work, the origins of this alternative system were investigated, exploring the distribution and diversification of nitrogenases in the Panibacillaceae family. Our analysis showed that diazotrophs represent 17% of Paenibacillaceae genomes, of these, only 14.4% (2.5% of all Paenibacillaceae genomes) also contained Fe or V- nitrogenases. Diverse nif-like sequences were also described, occurring mainly in genomes that also harbor the alternative systems. The analysis of genomes containing Fe-nitrogenase showed a conserved cluster of nifEN anfHDGK across three genera: Gorillibacterium, Fontibacillus, and Paenibacillus. A phylogeny of anfHDGK separated the Fe-nitrogenases into three main groups. Our analysis suggested that Fe-nitrogenase was acquired by the ancestral lineage of Fontibacillus, Gorillibacterium, and Paenibacillus genera via horizontal gene transfer (HGT), and further events of transfer and gene loss marked the evolution of this alternative nitrogenase in these groups. The species phylogeny of N-fixing Paenibacillaceae separated the diazotrophs into five clades, one of these containing all occurrences of strains harboring alternative nitrogenases in the Paenibacillus genus. The pangenome of this clade is open and composed of more than 96% of accessory genes. Diverse functional categories were enriched in the flexible genome, including functions related to replication and repair. The latter involved diverse genes related to HGT, suggesting that such events may have an important role in the evolution of diazotrophic Paenibacillus. This study provided an insight into the organization, distribution, and evolution of alternative nitrogenase genes in Paenibacillaceae, considering different genomic aspects.}, } @article {pmid36083529, year = {2022}, author = {de Lima Ferreira, JK and de Mello Varani, A and Tótola, MR and Fernandes Almeida, M and de Sousa Melo, D and Ferreira Silva E Batista, C and Chalfun-Junior, A and Pimenta de Oliveira, KK and Wurdig Roesch, LF and Satler Pylro, V}, title = {Phylogenomic characterization and pangenomic insights into the surfactin-producing bacteria Bacillus subtilis strain RI4914.}, journal = {Brazilian journal of microbiology : [publication of the Brazilian Society for Microbiology]}, volume = {53}, number = {4}, pages = {2051-2063}, pmid = {36083529}, issn = {1678-4405}, support = {404651/2018-6//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; 303061/2019-7//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; 133550/2019-2//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; Finance Code 001//Coordenação de Aperfeiçoamento de Pessoal de Nível Superior/ ; 001//Brazilian Microbiome Project/ ; }, mesh = {*Bacillus subtilis/genetics/metabolism ; Phylogeny ; *Peptides, Cyclic/genetics/metabolism ; Lipopeptides ; Operon ; Bacterial Proteins/metabolism ; }, abstract = {Bacillus subtilis is a versatile bacterial species able to produce surfactin, a lipopeptide biosurfactant. We carried out the phylogenomic characterization and pangenomic analyses using available B. subtilis complete genomes. Also, we report the whole genome of the biosurfactant-producing B. subtilis strain RI4914 that was isolated from effluent water from an oil exploration field. We applied a hybrid sequencing approach using both long- and short-read sequencing technologies to generate a highly accurate, single-chromosome genome. The pangenomics analysis of 153 complete genomes classified as B. subtilis retrieved from the NCBI shows an open pangenome composed of 28,511 accessory genes, which agrees with the high genetic plasticity of the species. Also, this analysis suggests that surfactin production is a common trait shared by members of this species since the srfA operon is highly conserved among the B. subtilis strains found in most of the assemblies available. Finally, increased surfactin production corroborates the higher srfAA gene expression in B. subtilis strain RI4914.}, } @article {pmid36081802, year = {2022}, author = {Zhai, Y and Wei, C}, title = {Open pangenome of Lactococcus lactis generated by a combination of metagenome-assembled genomes and isolate genomes.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {948138}, pmid = {36081802}, issn = {1664-302X}, abstract = {Lactococcus lactis (L. lactis) is a well isolated and cultured lactic acid bacterium, but if utilizing the isolate genomes alone, the genome-based analysis of this taxon would be incomplete, because there are still uncultured strains in some ecological niches. In this study, we recovered 93 high-quality metagenome-assembled genomes (MAGs) of L. lactis from food and human gut metagenomes with a culture-independent method. We then constructed a unified genome catalog of L. lactis by integrating these MAGs with 70 publicly available isolated genomes. Having this comprehensive resource, we assessed the genomic diversity and phylogenetic relationships to further explore the genetic and functional properties of L. lactis. An open pangenome of L. lactis was generated using our genome catalog, consisting of 13,066 genes in total, from which 5,448 genes were not identified in the isolate genomes. The core genome-based phylogenetic analysis showed that L. lactis strains we collected were separated into two main subclades corresponding to two subspecies, with some uncultured phylogenetic lineages discovered. The species disparity was also indicated in PCA analysis based on accessory genes of our pangenome. These various analyzes shed further light on unexpectedly high diversity within the taxon at both genome and gene levels and gave clues about its population structure and evolution. Lactococcus lactis has a long history of safe use in food fermentations and is considered as one of the important probiotic microorganisms. Obtaining the complete genetic information of L. lactis is important to the food and health industry. However, it can naturally inhabit many environments other than dairy products, including drain water and human gut samples. Here we presented an open pan-genome of L. lactis constructed from 163 high-quality genomes obtained from various environments, including MAGs recovered from environmental metagenomes and isolate genomes. This study expanded the genetic information of L. lactis about one third, including more than 5,000 novel genes found in uncultured strains. This more complete gene repertoire of L. lactis is crucial to further understanding the genetic and functional properties. These properties may be harnessed to impart additional value to dairy fermentation or other industries.}, } @article {pmid36077108, year = {2022}, author = {Lau, NS and Heng, WL and Miswan, N and Azami, NA and Furusawa, G}, title = {Comparative Genomic Analyses of the Genus Photobacterium Illuminate Biosynthetic Gene Clusters Associated with Antagonism.}, journal = {International journal of molecular sciences}, volume = {23}, number = {17}, pages = {}, pmid = {36077108}, issn = {1422-0067}, support = {304.PCCB.6315625//Universiti Sains Malaysia/ ; }, mesh = {Bacterial Typing Techniques ; Base Composition ; DNA, Bacterial/genetics ; *Fatty Acids/analysis ; Genomics ; Multigene Family ; *Photobacterium/genetics ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; }, abstract = {The genus Photobacterium is known for its ecophysiological versatility encompassing free-living, symbiotic, and pathogenic lifestyles. Photobacterium sp. CCB-ST2H9 was isolated from estuarine sediment collected at Matang Mangrove, Malaysia. In this study, the genome of CCB-ST2H9 was sequenced, and the pan-genome of 37 Photobacterium strains was analysed. Phylogeny based on core genes showed that CCB-ST2H9 clustered with P. galatheae, forming a distinct clade with P. halotolerans, P. salinisoli, and P. arenosum. The core genome of Photobacterium was conserved in housekeeping functions, while the flexible genome was well represented by environmental genes related to energy production and carbohydrate metabolism. Genomic metrics including 16S rRNA sequence similarity, average nucleotide identity, and digital DNA-DNA hybridization values were below the cut-off for species delineation, implying that CCB-ST2H9 potentially represents a new species. Genome mining revealed that biosynthetic gene clusters (BGCs) involved in producing antimicrobial compounds such as holomycin in CCB-ST2H9 could contribute to the antagonistic potential. Furthermore, the EtOAc extract from the culture broth of CCB-ST2H9 exhibited antagonistic activity against Vibrio spp. Intriguingly, clustering based on BGCs profiles grouped P. galatheae, P. halotolerans, P. salinisoli, P. arenosum, and CCB-ST2H9 together in the heatmap by the presence of a large number of BGCs. These BGCs-rich Photobacterium strains represent great potential for bioactive secondary metabolites production and sources for novel compounds.}, } @article {pmid36076928, year = {2022}, author = {Ravin, NV and Rudenko, TS and Smolyakov, DD and Beletsky, AV and Gureeva, MV and Samylina, OS and Grabovich, MY}, title = {History of the Study of the Genus Thiothrix: From the First Enrichment Cultures to Pangenomic Analysis.}, journal = {International journal of molecular sciences}, volume = {23}, number = {17}, pages = {}, pmid = {36076928}, issn = {1422-0067}, support = {20-14-00137//Russian Science Foundation/ ; }, mesh = {DNA, Bacterial/genetics ; Phylogeny ; RNA, Ribosomal, 16S/genetics/metabolism ; Sulfur/metabolism ; *Thiothrix/genetics/metabolism ; }, abstract = {Representatives of the genus Thiothrix are filamentous, sulfur-oxidizing bacteria found in flowing waters with counter-oriented sulfide and oxygen gradients. They were first described at the end of the 19th century, but the first pure cultures of this species only became available 100 years later. An increase in the number of described Thiothrix species at the beginning of the 21st century shows that the classical phylogenetic marker, 16S rRNA gene, is not informative for species differentiation, which is possible based on genome analysis. Pangenome analysis of the genus Thiothrix showed that the core genome includes genes for dissimilatory sulfur metabolism and central metabolic pathways, namely the Krebs cycle, Embden-Meyerhof-Parnas pathway, glyoxylate cycle, Calvin-Benson-Bassham cycle, and genes for phosphorus metabolism and amination. The shell part of the pangenome includes genes for dissimilatory nitrogen metabolism and nitrogen fixation, for respiration with thiosulfate. The dispensable genome comprises genes predicted to encode mainly hypothetical proteins, transporters, transcription regulators, methyltransferases, transposases, and toxin-antitoxin systems.}, } @article {pmid36076376, year = {2022}, author = {Yu, L and Zang, X and Chen, Y and Gao, Y and Pei, Z and Yang, B and Zhang, H and Narbad, A and Tian, F and Zhai, Q and Chen, W}, title = {Phenotype-genotype analysis of Latilactobacills curvatus from different niches: Carbohydrate metabolism, antibiotic resistance, bacteriocin, phage fragments and linkages with CRISPR-Cas systems.}, journal = {Food research international (Ottawa, Ont.)}, volume = {160}, number = {}, pages = {111640}, doi = {10.1016/j.foodres.2022.111640}, pmid = {36076376}, issn = {1873-7145}, support = {BBS/E/F/00044453/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; BBS/OS/NW/000006/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Bacteriocins/genetics ; *Bacteriophages/genetics ; CRISPR-Cas Systems ; Carbohydrate Metabolism/genetics ; Drug Resistance, Microbial ; Genome, Bacterial/genetics ; Genotype ; Phenotype ; }, abstract = {The potential probiotic function of Latilactobacills curvatus has attracted the attention of researchers. To explore the differences in the genomes of L. curvatus, nine strains were isolated from various sources, including feces and fermented vegetables and compared with 25 strains from the NCBI database. The findings indicated that the average genome size, GC content, and CDS of L. curvatus were 1.94 MB, 41.9%, and 1825, respectively. Its core genome is associated with transcription, translation, carbohydrate transport and metabolism, and defense functions. The pan-genome of L. curvatus was in a closed state. The genetic diversity of L. curatus is mainly manifested in its ability to use carbohydrates, antibiotic resistance, bacteriocin operon, and polymeric regularly interspaced short palindromic repeats (CRISPR)-Cas for bacterial immunity. The CRISPR system of 34 strains of L. curvatus was predominantly found to be of the IIA type with a few IIC and IE types. These findings will contribute to a better understanding of this species.}, } @article {pmid36073311, year = {2022}, author = {McLean, AR and Torres-Morales, J and Dewhirst, FE and Borisy, GG and Mark Welch, JL}, title = {Site-tropism of streptococci in the oral microbiome.}, journal = {Molecular oral microbiology}, volume = {37}, number = {6}, pages = {229-243}, pmid = {36073311}, issn = {2041-1014}, support = {R01 DE016937/DE/NIDCR NIH HHS/United States ; R01 DE022586/DE/NIDCR NIH HHS/United States ; R01 DE027958/DE/NIDCR NIH HHS/United States ; R01 DE030136/DE/NIDCR NIH HHS/United States ; }, mesh = {Humans ; RNA, Ribosomal, 16S/genetics ; *Streptococcus/genetics ; *Microbiota/genetics ; Metagenome ; Bacteria/genetics ; Mouth/microbiology ; Tropism ; Phylogeny ; }, abstract = {A detailed understanding of where bacteria localize is necessary to advance microbial ecology and microbiome-based therapeutics. The site-specialist hypothesis predicts that most microbes in the human oral cavity have a primary habitat type within the mouth where they are most abundant. We asked whether this hypothesis accurately describes the distribution of the members of the genus Streptococcus, a clinically relevant taxon that dominates most oral sites. Prior analysis of 16S rRNA gene sequencing data indicated that some oral Streptococcus clades are site-specialists while others may be generalists. However, within complex microbial populations composed of numerous closely related species and strains, such as the oral streptococci, genome-scale analysis is necessary to provide the resolution to discriminate closely related taxa with distinct functional roles. Here, we assess whether individual species within this genus are specialists using publicly available genomic sequence data that provide species-level resolution. We chose a set of high-quality representative genomes for human oral Streptococcus species. Onto these genomes, we mapped shotgun metagenomic sequencing reads from supragingival plaque, tongue dorsum, and other sites in the oral cavity. We found that every abundant Streptococcus species in the healthy human oral cavity showed strong site-tropism and that even closely related species such as S. mitis, S. oralis, and S. infantis specialized in different sites. These findings indicate that closely related bacteria can have distinct habitat distributions in the absence of dispersal limitation and under similar environmental conditions and immune regimes. Substantial overlap between the core genes of these three species suggests that site-specialization is determined by subtle differences in genomic content.}, } @article {pmid36069574, year = {2022}, author = {Zhong, C and Qu, B and Hu, G and Ning, K}, title = {Pan-Genome Analysis of Campylobacter: Insights on the Genomic Diversity and Virulence Profile.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0102922}, pmid = {36069574}, issn = {2165-0497}, mesh = {Animals ; Humans ; *Campylobacter/genetics ; *Campylobacter Infections ; *Gastroenteritis ; Genome, Bacterial ; Genomics ; Phylogeny ; Virulence/genetics ; Virulence Factors/genetics ; }, abstract = {The genus Campylobacter contains pathogens that cause bacterial gastroenteritis in humans and animals. Despite large-scale sequencing efforts to raise clinical awareness of Campylobacter, little is known about the diversity and functions of virulence factors. Here, we constructed the pan-genome of Campylobacter using 39 representative genomes, elucidating their genetic diversity, evolutionary characteristics, and virulence and resistance profiles. The Campylobacter pan-genome was open and showed extensive genome variability, with high levels of gene expansion and contraction as the organism evolved. These Campylobacter members had diverse virulence gene content, and six potential core virulence genes (porA, PEB4, cheY, htrB, Cj1135, and kpsF) have been identified. The conserved mechanisms for Campylobacter pathogenicity were related to adherence, motility, and immune modulation. We emphasized the relative importance of variable virulence genes. Many virulence genes have experienced expansion or contraction in specific lineages, which may be one of the factors causing differences in the content of virulence genes. Additionally, these Campylobacter genomes have a high prevalence of the cmeA and cmeC genes, which are linked to the CmeABC pump and contribute to multidrug resistance. The genomic variations, core and variable virulence factors, and resistance genes of Campylobacter characterized in this study would contribute to a better understanding of the virulence of Campylobacter and more effective use of candidates for drug development and prevention of Campylobacter infections. IMPORTANCE Pathogenic members of the genus Campylobacter are recognized as one of the major causative agents of human bacterial gastroenteritis. This study revealed the pan-genome of 39 Campylobacter species, provided the most updated reconstruction of the global virulence gene pool of 39 Campylobacter species, and identified species-related virulence differences. This study highlighted the basic conserved functionality and specificity of pathogenicity that are crucial to infection, which was critical for improving the diagnosis and prevention of Campylobacter infections.}, } @article {pmid36067550, year = {2022}, author = {Hitch, TCA and Bisdorf, K and Afrizal, A and Riedel, T and Overmann, J and Strowig, T and Clavel, T}, title = {A taxonomic note on the genus Prevotella: Description of four novel genera and emended description of the genera Hallella and Xylanibacter.}, journal = {Systematic and applied microbiology}, volume = {45}, number = {6}, pages = {126354}, doi = {10.1016/j.syapm.2022.126354}, pmid = {36067550}, issn = {1618-0984}, mesh = {Humans ; RNA, Ribosomal, 16S/genetics ; Phylogeny ; DNA, Bacterial/genetics ; Sequence Analysis, DNA ; *Ecosystem ; *Prevotella/genetics ; }, abstract = {The genus Prevotella comprises 55 species with validly published, and correct, names (at June 2021) that are phenotypically, ecologically and functionally diverse. This study used a range of comparative genome approaches (marker gene-based genome phylogeny, core genome phylogeny, average amino acid identity, percentage of conserved proteins and clade-specific marker genes) to identify large differences between the 53 species for which genomes are available, as well as two effectively published yet not validly named species and four novel species. These differences were consistent between the various analysis methods and justify the separation of Prevotella into multiple genera. While the distribution across 19 ecosystem types was unique for each species and inconsistent within clades, the functional repertoire based on the presence/absence of both PFAMs and CAZy families revealed distinct clustering based on the proposed genera. Based on the integration of all results, we propose the reclassification of species previously assigned to the genus Prevotella into seven genera, including four novel genera for which the names Segatella, Hoylesella, Leyella and Palleniella are proposed. In addition to the reclassification of Prevotella, this work describes four novel species, Hallella faecis, Xylanibacter rodentium, Xylanibacter muris, and Palleniella intestinalis.}, } @article {pmid36061813, year = {2022}, author = {Cai, K and Kuang, L and Yue, W and Xie, S and Xia, X and Zhang, G and Wang, J}, title = {Calmodulin and calmodulin-like gene family in barley: Identification, characterization and expression analyses.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {964888}, pmid = {36061813}, issn = {1664-462X}, abstract = {Calmodulin (CaM) and calmodulin-like (CML) proteins are Ca[2+] relays and play diverse and multiple roles in plant growth, development and stress responses. However, CaM/CML gene family has not been identified in barley (Hordeum vulgare). In the present study, 5 HvCaMs and 80 HvCMLs were identified through a genome-wide analysis. All HvCaM proteins possessed 4 EF-hand motifs, whereas HvCMLs contained 1 to 4 EF-hand motifs. HvCaM2, HvCaM3 and HvCaM5 coded the same polypeptide although they differed in nucleotide sequence, which was identical to the polypeptides coded by OsCaM1-1, OsCaM1-2 and OsCaM1-3. HvCaMs/CMLs were unevenly distributed over barley 7 chromosomes, and could be phylogenetically classified into 8 groups. HvCaMs/CMLs differed in gene structure, cis-acting elements and tissue expression patterns. Segmental and tandem duplication were observed among HvCaMs/CMLs during evolution. HvCML16, HvCML18, HvCML50 and HvCML78 were dispensable genes and the others were core genes in barley pan-genome. In addition, 14 HvCaM/CML genes were selected to examine their responses to salt, osmotic and low potassium stresses by qRT-PCR, and their expression were stress-and time-dependent. These results facilitate our understanding and further functional identification of HvCaMs/CMLs.}, } @article {pmid36058542, year = {2022}, author = {Ribeiro, M and Sousa, M and Borges, V and Gomes, JP and Duarte, S and Isidro, J and Vieira, L and Torres, C and Santos, H and Capelo, JL and Poeta, P and Igrejas, G}, title = {Bioinformatics study of expression from genomes of epidemiologically related MRSA CC398 isolates from human and wild animal samples.}, journal = {Journal of proteomics}, volume = {268}, number = {}, pages = {104714}, doi = {10.1016/j.jprot.2022.104714}, pmid = {36058542}, issn = {1876-7737}, mesh = {Aminoglycosides ; Animals ; Animals, Wild/microbiology ; Anti-Bacterial Agents/pharmacology ; Clindamycin ; Computational Biology ; Humans ; Immunoglobulins ; Livestock ; Macrolides ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Proteome ; *Staphylococcal Infections/epidemiology/veterinary ; Staphylococcus aureus/genetics ; *Transcriptome ; }, abstract = {One of the most important livestock-associated methicillin-resistant Staphylococcus aureus (LA-MRSA) genetic lineages is the clonal complex (CC) 398, which can cause typical S. aureus-associated infections in people. In this work, whole-genome sequencing, RNA-sequencing, and gel-based comparative proteomics were applied to study the genetic characteristics of three MRSA CC398 isolates recovered from humans (strains C5621 and C9017), and from an animal (strain OR418). Of the three strains, C9017 presented the broadest resistance genotype, including resistance to fluroquinolone, clindamycin, tiamulin, macrolide and aminoglycoside antimicrobial classes. The scn, sak, and chp genes of the immune evasion cluster system were solely detected in OR418. Pangenome analysis showed a total of 288 strain-specific genes, most of which are hypothetical or phage-related proteins. OR418 had the most pronounced genetic differences. RNAIII (δ-hemolysin) gene was clearly the most expressed gene in OR418 and C5621, but it was not detected in C9017. Significant differences in the proteome profiles were found between strains. For example, the immunoglobulin-binding protein Sbi was more abundant in OR418. Considering that Sbi is a multifunctional immune evasion factor in S. aureus, the results point to OR418 strain having high zoonotic potential. Overall, multiomics biomarker signatures can assume an important role to advance precision medicine in the years to come. SIGNIFICANCE: MRSA is one of the most representative drug-resistant pathogens and its dissemination is increasing due to MRSA capability of establishing new reservoirs. LA-MRSA is considered an emerging problem worldwide and CC398 is one of the most important genetic lineages. In this study, three MRSA CC398 isolates recovered from humans and from a wild animal were analyzed through whole-genome sequencing, RNA-sequencing, and gel-based comparative proteomics in order to gather systems-wide omics data and better understand the genetic characteristics of this lineage to identify distinctive markers and genomic features of relevance to public health.}, } @article {pmid36053980, year = {2022}, author = {Goff, JL and Szink, EG and Thorgersen, MP and Putt, AD and Fan, Y and Lui, LM and Nielsen, TN and Hunt, KA and Michael, JP and Wang, Y and Ning, D and Fu, Y and Van Nostrand, JD and Poole, FL and Chandonia, JM and Hazen, TC and Stahl, DA and Zhou, J and Arkin, AP and Adams, MWW}, title = {Ecophysiological and genomic analyses of a representative isolate of highly abundant Bacillus cereus strains in contaminated subsurface sediments.}, journal = {Environmental microbiology}, volume = {24}, number = {11}, pages = {5546-5560}, pmid = {36053980}, issn = {1462-2920}, mesh = {RNA, Ribosomal, 16S/genetics ; *Bacillus cereus/genetics ; *Metals, Heavy ; Genomics ; Phylogeny ; }, abstract = {Bacillus cereus strain CPT56D-587-MTF (CPTF) was isolated from the highly contaminated Oak Ridge Reservation (ORR) subsurface. This site is contaminated with high levels of nitric acid and multiple heavy metals. Amplicon sequencing of the 16S rRNA genes (V4 region) in sediment from this area revealed an amplicon sequence variant (ASV) with 100% identity to the CPTF 16S rRNA sequence. Notably, this CPTF-matching ASV had the highest relative abundance in this community survey, with a median relative abundance of 3.77% and comprised 20%-40% of reads in some samples. Pangenomic analysis revealed that strain CPTF has expanded genomic content compared to other B. cereus species-largely due to plasmid acquisition and expansion of transposable elements. This suggests that these features are important for rapid adaptation to native environmental stressors. We connected genotype to phenotype in the context of the unique geochemistry of the site. These analyses revealed that certain genes (e.g. nitrate reductase, heavy metal efflux pumps) that allow this strain to successfully occupy the geochemically heterogenous microniches of its native site are characteristic of the B. cereus species while others such as acid tolerance are mobile genetic element associated and are generally unique to strain CPTF.}, } @article {pmid36051757, year = {2022}, author = {Dai, Z and Wu, T and Xu, S and Zhou, L and Tang, W and Hu, E and Zhan, L and Chen, M and Yu, G}, title = {Characterization of toxin-antitoxin systems from public sequencing data: A case study in Pseudomonas aeruginosa.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {951774}, pmid = {36051757}, issn = {1664-302X}, abstract = {The toxin-antitoxin (TA) system is a widely distributed group of genetic modules that play important roles in the life of prokaryotes, with mobile genetic elements (MGEs) contributing to the dissemination of antibiotic resistance gene (ARG). The diversity and richness of TA systems in Pseudomonas aeruginosa, as one of the bacterial species with ARGs, have not yet been completely demonstrated. In this study, we explored the TA systems from the public genomic sequencing data and genome sequences. A small scale of genomic sequencing data in 281 isolates was selected from the NCBI SRA database, reassembling the genomes of these isolates led to the findings of abundant TA homologs. Furthermore, remapping these identified TA modules on 5,437 genome/draft genomes uncovers a great diversity of TA modules in P. aeruginosa. Moreover, manual inspection revealed several TA systems that were not yet reported in P. aeruginosa including the hok-sok, cptA-cptB, cbeA-cbtA, tomB-hha, and ryeA-sdsR. Additional annotation revealed that a large number of MGEs were closely distributed with TA. Also, 16% of ARGs are located relatively close to TA. Our work confirmed a wealth of TA genes in the unexplored P. aeruginosa pan-genomes, expanded the knowledge on P. aeruginosa, and provided methodological tips on large-scale data mining for future studies. The co-occurrence of MGE, ARG, and TA may indicate a potential interaction in their dissemination.}, } @article {pmid36042298, year = {2022}, author = {}, title = {One pangenome to bind them all.}, journal = {Nature biotechnology}, volume = {40}, number = {9}, pages = {1301}, doi = {10.1038/s41587-022-01484-y}, pmid = {36042298}, issn = {1546-1696}, mesh = {*Genome, Bacterial ; *Genomics ; }, } @article {pmid36034705, year = {2022}, author = {Tian, C and Xing, M and Fu, L and Zhao, Y and Fan, X and Wang, S}, title = {Emergence of uncommon KL38-OCL6-ST220 carbapenem-resistant Acinetobacter pittii strain, co-producing chromosomal NDM-1 and OXA-820 carbapenemases.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {943735}, pmid = {36034705}, issn = {2235-2988}, mesh = {Acinetobacter ; *Acinetobacter Infections ; Bacterial Proteins ; DNA Transposable Elements ; Humans ; Meropenem ; Microbial Sensitivity Tests ; Multilocus Sequence Typing ; Phylogeny ; Virulence Factors ; beta-Lactamases ; }, abstract = {OBJECTIVE: To characterize one KL38-OCL6-ST220 carbapenem-resistant Acinetobacter pittii strain, co-producing chromosomal NDM-1 and OXA-820 carbapenemases.

METHODS: A. pittii TCM strain was isolated from a bloodstream infection (BSI). Antimicrobial susceptibility tests were conducted via disc diffusion and broth microdilution. Stability experiments of bla NDM-1 and bla OXA-820 carbapenemase genes were further performed. Whole-genome sequencing (WGS) was performed on the Illumina and Oxford Nanopore platforms. Multilocus sequence typing (MLST) was analyzed based on the Pasteur and Oxford schemes. Resistance genes, virulence factors, and insertion sequences (ISs) were identified with ABRicate based on ResFinder 4.0, virulence factor database (VFDB), and ISfinder. Capsular polysaccharide (KL), lipooligosaccharide outer core (OCL), and plasmid reconstruction were tested using Kaptive and PLACNETw. PHASTER was used to predict prophage regions. A comparative genomics analysis of all ST220 A. pittii strains from the public database was carried out. Point mutations, average nucleotide identity (ANI), DNA-DNA hybridization (DDH) distances, and pan-genome analysis were performed.

RESULTS: A. pittii TCM was ST220[Pas] and ST1818[Oxf] with KL38 and OCL6, respectively. It was resistant to imipenem, meropenem, and ciprofloxacin but still susceptible to amikacin, colistin, and tigecycline. WGS revealed that A. pittii TCM contained one circular chromosome and four plasmids. The Tn125 composite transposon, including bla NDM-1, was located in the chromosome with 3-bp target site duplications (TSDs). Many virulence factors and the bla OXA-820 carbapenemase gene were also identified. The stability assays revealed that bla NDM-1 and bla OXA-820 were stabilized by passage in an antibiotic-free medium. Moreover, 12 prophage regions were identified in the chromosome. Phylogenetic analysis showed that there are 11 ST220 A. pittii strains, and one collected from Anhui, China was closely related. All ST220 A. pittii strains presented high ANI and DDH values; they ranged from 99.85% to 100% for ANI and from 97.4% to 99.9% for DDH. Pan-genome analysis revealed 3,200 core genes, 0 soft core genes, 1,571 shell genes, and 933 cloud genes among the 11 ST220 A. pittii strains.

CONCLUSIONS: The coexistence of chromosomal NDM-1 and OXA-820 carbapenemases in A. pittii presents a huge challenge in healthcare settings. Increased surveillance of this species in hospital and community settings is urgently needed.}, } @article {pmid36029458, year = {2022}, author = {Hwang, CY and Cho, ES and Rhee, WJ and Kim, E and Seo, MJ}, title = {Genomic and physiological analysis of C50 carotenoid-producing novel Halorubrum ruber sp. nov.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {60}, number = {10}, pages = {1007-1020}, pmid = {36029458}, issn = {1976-3794}, mesh = {Amino Acids/genetics ; Antioxidants/analysis ; Bacterial Typing Techniques ; Carotenoids ; DNA, Archaeal/genetics ; DNA, Bacterial ; Fatty Acids/analysis ; Free Radicals ; Genomics ; *Halorubrum/genetics ; Mevalonic Acid ; Nucleic Acid Hybridization ; Nucleotides ; Phospholipids/analysis ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; Sodium Chloride/metabolism ; Water ; }, abstract = {A novel haloarchaeal species designated as MBLA0099[T] was isolated from seawater near Yeongheung Island. Cells were Gram-negative, non-motile, red-pigmented, and rod-shaped. They grew at 10-45°C, within pH 5.5-9.0, and between 7.5% and 30% NaCl concentrations. Cells were able to grow without Mg[2+] and were lysed in distilled water. The size of the whole-genome and G + C content of DNA was 3.02 Mb and 68.9 mol%, respectively. Phylogenetic analysis shows that the strain MBLA0099[T] belongs to the genus Halorubrum. The average nucleotide and amino acid identity, and in silico DNA-DNA hybridization values were below the species delineation threshold. Pan-genomic analysis revealed that 3.2% of all genes present in strain MBLA0099[T] were unique to the strain. The red carotenoid produced by strain MBLA0099[T] was subjected to spectrometric and chromatographic analyses and confirmed to be bacterioruberin as C50 carotenoid. Mevalonic acid, terpenoid backbone, and carotenoid biosynthesis pathway were annotated for strain MBLA0099[T]. The C50 carotenoid production by strain MBLA0099[T] was also enhanced under various stress conditions including relatively netural pH, high oxidative and salinity conditions. Additionally, the strain MBLA0099[T]-derived bacterioruberin showed the antioxidant activity with EC50 value of 12.29 µg/ml, based on the evaluation of DPPH free radical scavenging activity. The present study would be the first report on the identification of C50 carotenoid from the strain MBLA0099[T] representing a novel species of the genus Halorubrum, for which the name Halorubrum ruber sp. nov. is proposed. The typestrain used was MBLA0099[T] (= KCTC 4296[T] = JCM 34701[T]).}, } @article {pmid36016080, year = {2022}, author = {Yousaf, M and Ullah, A and Sarosh, N and Abbasi, SW and Ismail, S and Bibi, S and Hasan, MM and Albadrani, GM and Talaat Nouh, NA and Abdulhakim, JA and Abdel-Daim, MM and Bin Emran, T}, title = {Design of Multi-Epitope Vaccine for Staphylococcus saprophyticus: Pan-Genome and Reverse Vaccinology Approach.}, journal = {Vaccines}, volume = {10}, number = {8}, pages = {}, pmid = {36016080}, issn = {2076-393X}, support = {PNURSP2022R30//This research was supported by Princess Nourah bint Abdulrahman University Researchers Supporting Project number (PNURSP2022R30), Princess Nourah bint Abdulrahman University, Riyadh, Saudi Arabia./ ; }, abstract = {Staphylococcus saprophyticus is a Gram-positive coccus responsible for the occurrence of cystitis in sexually active, young females. While effective antibiotics against this organism exist, resistant strains are on the rise. Therefore, prevention via vaccines appears to be a viable solution to address this problem. In comparison to traditional techniques of vaccine design, computationally aided vaccine development demonstrates marked specificity, efficiency, stability, and safety. In the present study, a novel, multi-epitope vaccine construct was developed against S. saprophyticus by targeting fully sequenced proteomes of its five different strains, which were examined using a pangenome and subtractive proteomic strategy to characterize prospective vaccination targets. The three immunogenic vaccine targets which were utilized to map the probable immune epitopes were verified by annotating the entire proteome. The predicted epitopes were further screened on the basis of antigenicity, allergenicity, water solubility, toxicity, virulence, and binding affinity towards the DRB*0101 allele, resulting in 11 potential epitopes, i.e., DLKKQKEKL, NKDLKKQKE, QDKLKDKSD, NVMDNKDLE, TSGTPDSQA, NANSDGSSS, GSDSSSSNN, DSSSSNNDS, DSSSSDRNN, SSSDRNNGD, and SSDDKSKDS. All these epitopes have the efficacy to cover 99.74% of populations globally. Finally, shortlisted epitopes were joined together with linkers and three different adjuvants to find the most stable and immunogenic vaccine construct. The top-ranked vaccine construct was further scrutinized on the basis of its physicochemical characterization and immunological profile. The non-allergenic and antigenic features of modeled vaccine constructs were initially validated and then subjected to docking with immune receptor major histocompatibility complex I and II (MHC-I and II), resulting in strong contact. In silico cloning validations yielded a codon adaptation index (CAI) value of 1 and an ideal percentage of GC contents (46.717%), indicating a putative expression of the vaccine in E. coli. Furthermore, immune simulation demonstrated that, after injecting the proposed MEVC, powerful antibodies were produced, resulting in the sharpest peaks of IgM + IgG formation (>11,500) within 5 to 15 days. Experimental testing against S. saprophyticus can evaluate the safety and efficacy of these prophylactic vaccination designs.}, } @article {pmid36014959, year = {2022}, author = {Jing, L and Xu, Z and Zhang, Y and Li, D and Song, Y and Hu, H and Fang, Y and Zhu, W}, title = {Metagenomic Insights into Pathogenic Characterization of ST410 Acinetobacter nosocomialis Prevalent in China.}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {8}, pages = {}, pmid = {36014959}, issn = {2076-0817}, support = {TYU-039F//Beijing Medical and Health Foundation/ ; }, abstract = {Acinetobacter nosocomialis is a prevalent opportunistic pathogen that causes hospital-acquired infections. The increasing threats from A. nosocomialis infections have led to attention from the scientific and medical communities. Metagenomic next-generation sequencing (mNGS) was performed for an exudate specimen collected from an ICU patient with wound infection, followed by sepsis, in Tongji Hospital. Three assembly strategies were employed to recover the genome of A. nosocomialis in the metagenomic sample. Together with publicly available genomes of A. nosocomialis, the features of population genetics and molecular epidemiology were deeply analyzed. A draft genome was reconstructed for the metagenomic strain WHM01, derived from the ST410 A. nosocomialis dominating the microbial community, thereby prompting its highly pathogenic risk, which is associated with infection and persistence. The structure of the bacterial pangenome was characterized, including the 1862 core and 11,815 accessory genes present in the 157 strains. The genetic diversity of the genes coding for the 128 virulence factors assigned to 14 functional categories was uncovered in this nosocomial pathogen, such as the lipooligosaccharide, capsule, type IV pilus, and outer membrane proteins. Our work revealed genomic properties of ST410 A. nosocomialis, which is prevalent in China, and further highlighted that metagenomic surveillance may be a prospective application for evaluating the pathogenic characteristics of the nosocomial opportunistic pathogens.}, } @article {pmid36013379, year = {2022}, author = {Zoclanclounon, YAB and Rostás, M and Chung, NJ and Mo, Y and Karlovsky, P and Dossa, K}, title = {Characterization of Peroxidase and Laccase Gene Families and In Silico Identification of Potential Genes Involved in Upstream Steps of Lignan Formation in Sesame.}, journal = {Life (Basel, Switzerland)}, volume = {12}, number = {8}, pages = {}, pmid = {36013379}, issn = {2075-1729}, support = {Ref 3.4 - 1202788 - 417 SEN - GF-P//Alexander von Humboldt Foundation/ ; }, abstract = {Peroxidases and laccases are oxidative enzymes involved in physiological processes in plants, covering responses to biotic and abiotic stress as well as biosynthesis of health-promoting specialized metabolites. Although they are thought to be involved in the biosynthesis of (+)-pinoresinol, a comprehensive investigation of this class of enzymes has not yet been conducted in the emerging oil crop sesame and no information is available regarding the potential (+)-pinoresinol synthase genes in this crop. In the present study, we conducted a pan-genome-wide identification of peroxidase and laccase genes coupled with transcriptome profiling of diverse sesame varieties. A total of 83 and 48 genes have been identified as coding for sesame peroxidase and laccase genes, respectively. Based on their protein domain and Arabidopsis thaliana genes used as baits, the genes were classified into nine and seven groups of peroxidase and laccase genes, respectively. The expression of the genes was evaluated using dynamic transcriptome sequencing data from six sesame varieties, including one elite cultivar, white vs black seed varieties, and high vs low oil content varieties. Two peroxidase genes (SiPOD52 and SiPOD63) and two laccase genes (SiLAC1 and SiLAC39), well conserved within the sesame pan-genome and exhibiting consistent expression patterns within sesame varieties matching the kinetic of (+)-pinoresinol accumulation in seeds, were identified as potential (+)-pinoresinol synthase genes. Cis-acting elements of the candidate genes revealed their potential involvement in development, hormonal signaling, and response to light and other abiotic triggers. Transcription factor enrichment analysis of promoter regions showed the predominance of MYB binding sequences. The findings from this study pave the way for lignans-oriented engineering of sesame with wide potential applications in food, health and medicinal domains.}, } @article {pmid36012871, year = {2022}, author = {Ogaji, YO and Lee, RC and Sawbridge, TI and Cocks, BG and Daetwyler, HD and Kaur, S}, title = {De Novo Long-Read Whole-Genome Assemblies and the Comparative Pan-Genome Analysis of Ascochyta Blight Pathogens Affecting Field Pea.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {8}, pages = {}, pmid = {36012871}, issn = {2309-608X}, abstract = {Ascochyta Blight (AB) is a major disease of many cool-season legumes globally. In field pea, three fungal pathogens have been identified to be responsible for this disease in Australia, namely Peyronellaea pinodes, Peyronellaea pinodella and Phoma koolunga. Limited genomic resources for these pathogens have been generated, which has hampered the implementation of effective management strategies and breeding for resistant cultivars. Using Oxford Nanopore long-read sequencing, we report the first high-quality, fully annotated, near-chromosome-level nuclear and mitochondrial genome assemblies for 18 isolates from the Australian AB complex. Comparative genome analysis was performed to elucidate the differences and similarities between species and isolates using phylogenetic relationships and functional diversity. Our data indicated that P. pinodella and P. koolunga are heterothallic, while P. pinodes is homothallic. More homology and orthologous gene clusters are shared between P. pinodes and P. pinodella compared to P. koolunga. The analysis of the repetitive DNA content showed differences in the transposable repeat composition in the genomes and their expression in the transcriptomes. Significant repeat expansion in P. koolunga's genome was seen, with strong repeat-induced point mutation (RIP) activity being evident. Phylogenetic analysis revealed that genetic diversity can be exploited for species marker development. This study provided the much-needed genetic resources and characterization of the AB species to further drive research in key areas such as disease epidemiology and host-pathogen interactions.}, } @article {pmid36011264, year = {2022}, author = {Woldegiorgis, ST and Wu, T and Gao, L and Huang, Y and Zheng, Y and Qiu, F and Xu, S and Tao, H and Harrison, A and Liu, W and He, H}, title = {Identification of Heat-Tolerant Genes in Non-Reference Sequences in Rice by Integrating Pan-Genome, Transcriptomics, and QTLs.}, journal = {Genes}, volume = {13}, number = {8}, pages = {}, pmid = {36011264}, issn = {2073-4425}, mesh = {Genes, Plant ; *Oryza/genetics ; Quantitative Trait Loci/genetics ; *Thermotolerance/genetics ; Transcriptome ; }, abstract = {The availability of large-scale genomic data resources makes it very convenient to mine and analyze genes that are related to important agricultural traits in rice. Pan-genomes have been constructed to provide insight into the genome diversity and functionality of different plants, which can be used in genome-assisted crop improvement. Thus, a pan-genome comprising all genetic elements is crucial for comprehensive variation study among the heat-resistant and -susceptible rice varieties. In this study, a rice pan-genome was firstly constructed by using 45 heat-tolerant and 15 heat-sensitive rice varieties. A total of 38,998 pan-genome genes were identified, including 37,859 genes in the reference and 1141 in the non-reference contigs. Genomic variation analysis demonstrated that a total of 76,435 SNPs were detected and identified as the heat-tolerance-related SNPs, which were specifically present in the highly heat-resistant rice cultivars and located in the genic regions or within 2 kbp upstream and downstream of the genes. Meanwhile, 3214 upregulated and 2212 downregulated genes with heat stress tolerance-related SNPs were detected in one or multiple RNA-seq datasets of rice under heat stress, among which 24 were located in the non-reference contigs of the rice pan-genome. We then mapped the DEGs with heat stress tolerance-related SNPs to the heat stress-resistant QTL regions. A total of 1677 DEGs, including 990 upregulated and 687 downregulated genes, were mapped to the 46 heat stress-resistant QTL regions, in which 2 upregulated genes with heat stress tolerance-related SNPs were identified in the non-reference sequences. This pan-genome resource is an important step towards the effective and efficient genetic improvement of heat stress resistance in rice to help meet the rapidly growing needs for improved rice productivity under different environmental stresses. These findings provide further insight into the functional validation of a number of non-reference genes and, especially, the two genes identified in the heat stress-resistant QTLs in rice.}, } @article {pmid36010855, year = {2022}, author = {Yamamoto, M and Takahashi, Y}, title = {Genetic and Epigenetic Pathogenesis of Acromegaly.}, journal = {Cancers}, volume = {14}, number = {16}, pages = {}, pmid = {36010855}, issn = {2072-6694}, abstract = {Acromegaly is caused by excessive secretion of GH and IGF-I mostly from somatotroph tumors. Various genetic and epigenetic factors are involved in the pathogenesis of somatotroph tumors. While somatic mutations of GNAS are the most prevalent cause of somatotroph tumors, germline mutations in various genes (AIP, PRKAR1A, GPR101, GNAS, MEN1, CDKN1B, SDHx, MAX) are also known as the cause of somatotroph tumors. Moreover, recent findings based on multiple perspectives of the pangenomic approach including genome, transcriptome, and methylome analyses, histological characterization, genomic instability, and possible involvement of miRNAs have gradually unveiled the whole landscape of the underlying mechanisms of somatotroph tumors. In this review, we will focus on the recent advances in genetic and epigenetic pathogenesis of somatotroph tumors.}, } @article {pmid36008774, year = {2022}, author = {Rodriguez Jimenez, A and Guiglielmoni, N and Goetghebuer, L and Dechamps, E and George, IF and Flot, JF}, title = {Comparative genome analysis of Vagococcus fluvialis reveals abundance of mobile genetic elements in sponge-isolated strains.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {618}, pmid = {36008774}, issn = {1471-2164}, support = {DISARM//Fonds De La Recherche Scientifique - FNRS/ ; DISARM//Fonds De La Recherche Scientifique - FNRS/ ; DISARM//Fonds De La Recherche Scientifique - FNRS/ ; }, mesh = {Animals ; Enterococcaceae/genetics ; Interspersed Repetitive Sequences/genetics ; Phylogeny ; *Porifera/genetics ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: Vagococcus fluvialis is a species of lactic acid bacteria found both free-living in river and seawater and associated to hosts, such as marine sponges. This species has been greatly understudied, with no complete genome assembly available to date, which is essential for the characterisation of the mobilome.

RESULTS: We sequenced and assembled de novo the complete genome sequences of five V. fluvialis isolates recovered from marine sponges. Pangenome analysis of the V. fluvialis species (total of 17 genomes) showed a high intraspecific diversity, with 45.5% of orthologous genes found to be strain specific. Despite this diversity, analyses of gene functions clustered all V. fluvialis species together and separated them from other sequenced Vagococcus species. V. fluvialis strains from different habitats were highly similar in terms of functional diversity but the sponge-isolated strains were enriched in several functions related to the marine environment. Furthermore, sponge-isolated strains carried a significantly higher number of mobile genetic elements (MGEs) compared to previously sequenced V. fluvialis strains from other environments. Sponge-isolated strains carried up to 4 circular plasmids each, including a 48-kb conjugative plasmid. Three of the five strains carried an additional circular extrachromosomal sequence, assumed to be an excised prophage as it contained mainly viral genes and lacked plasmid replication genes. Insertion sequences (ISs) were up to five times more abundant in the genomes of sponge-isolated strains compared to the others, including several IS families found exclusively in these genomes.

CONCLUSIONS: Our findings highlight the dynamics and plasticity of the V. fluvialis genome. The abundance of mobile genetic elements in the genomes of sponge-isolated V. fluvialis strains suggests that the mobilome might be key to understanding the genomic signatures of symbiosis in bacteria.}, } @article {pmid36005754, year = {2022}, author = {Ashrafi, S and Kuzmanović, N and Patz, S and Lohwasser, U and Bunk, B and Spröer, C and Lorenz, M and Elhady, A and Frühling, A and Neumann-Schaal, M and Verbarg, S and Becker, M and Thünen, T}, title = {Two New Rhizobiales Species Isolated from Root Nodules of Common Sainfoin (Onobrychis viciifolia) Show Different Plant Colonization Strategies.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0109922}, pmid = {36005754}, issn = {2165-0497}, mesh = {Fertilizers ; Carbon Dioxide ; *Mesorhizobium/genetics ; *Fabaceae/microbiology ; *Rhizobium/genetics ; Symbiosis ; Nitrogen ; }, abstract = {Root nodules of legume plants are primarily inhabited by rhizobial nitrogen-fixing bacteria. Here, we propose two new Rhizobiales species isolated from root nodules of common sainfoin (Onobrychis viciifolia), as shown by core-gene phylogeny, overall genome relatedness indices, and pan-genome analysis. Mesorhizobium onobrychidis sp. nov. actively induces nodules and achieves atmospheric nitrogen and carbon dioxide fixation. This species appears to be depleted in motility genes and is enriched in genes for direct effects on plant growth performance. Its genome reveals functional and plant growth-promoting signatures, like a large unique chromosomal genomic island with high density of symbiotic genetic traits. Onobrychidicola muellerharveyae gen. nov. sp. nov. is described as a type species of the new genus Onobrychidicola in Rhizobiaceae. This species comprises unique genetic features and plant growth-promoting traits (PGPTs), which strongly indicate its function in biotic stress reduction and motility. We applied a newly developed bioinformatics approach for in silico prediction of PGPTs (PGPT-Pred), which supports the different lifestyles of the two new species and the plant growth-promoting performance of M. onobrychidis in the greenhouse trial. IMPORTANCE The intensive use of chemical fertilizers has a variety of negative effects on the environment. Increased utilization of biological nitrogen fixation (BNF) is one way to mitigate those negative impacts. In order to optimize BNF, suitable candidates for different legume species are required. Despite intensive search for new rhizobial bacteria associated with legumes, no new rhizobia have recently been identified from sainfoin (Onobrychis viciifolia). Here, we report on the discovery of two new rhizobial species associated with sainfoin, which are of high importance for the host and may help to increase sustainability in agricultural practices. We employed the combination of in silico prediction and in planta experiments, which is an effective way to detect promising plant growth-promoting bacteria.}, } @article {pmid36004795, year = {2022}, author = {Liu, H and Wang, X and Liu, S and Huang, Y and Guo, YX and Xie, WZ and Liu, H and Tahir Ul Qamar, M and Xu, Q and Chen, LL}, title = {Citrus Pan-Genome to Breeding Database (CPBD): A comprehensive genome database for citrus breeding.}, journal = {Molecular plant}, volume = {15}, number = {10}, pages = {1503-1505}, doi = {10.1016/j.molp.2022.08.006}, pmid = {36004795}, issn = {1752-9867}, mesh = {*Citrus/genetics ; Genome, Plant/genetics ; Plant Breeding ; }, } @article {pmid36003217, year = {2022}, author = {Mattock, J and Smith, AM and Keddy, KH and Manners, EJ and Duze, ST and Smouse, S and Tau, N and Baker, D and Chattaway, MA and Mather, AE and Wain, J and Langridge, GC}, title = {Genetic characterization of Salmonella Infantis from South Africa, 2004-2016.}, journal = {Access microbiology}, volume = {4}, number = {7}, pages = {acmi000371}, pmid = {36003217}, issn = {2516-8290}, support = {MC_PC_16093/MRC_/Medical Research Council/United Kingdom ; }, abstract = {Salmonella Infantis is presenting an increasing risk to public health. Of particular concern are the reports of pESI, a multidrug resistance (MDR) encoding megaplasmid, in isolates from multiple countries, but little is known about its presence or diversity in South Africa. Whole genome sequences of 387 S. Infantis isolates from South Africa (2004-2020) were analysed for genetic phylogeny, recombination frequency, antimicrobial resistance (AMR) determinants, plasmid presence and overall gene content. The population structure of South African S. Infantis was substantially different to S. Infantis reported elsewhere; only two thirds of isolates belonged to eBG31, while the remainder were identified as eBG297, a much rarer group globally. Significantly higher levels of recombination were observed in the eBG297 isolates, which was associated with the presence of prophages. The majority of isolates were putatively susceptible to antimicrobials (335/387) and lacked any plasmids (311/387); the megaplasmid pESI was present in just one isolate. A larger proportion of eBG31 isolates, 19% (49/263), contained at least one AMR determinant, compared to eBG297 at 2% (3/124). Comparison of the pan-genomes of isolates from either eBG identified 943 genes significantly associated with eBG, with 43 found exclusively in eBG31 isolates and 34 in eBG297 isolates. This, along with the single nucleotide polymorphism distance and difference in resistance profiles, suggests that eBG31 and eBG297 isolates occupy different niches within South Africa. If antibiotic-resistant S. Infantis emerges in South Africa, probably through the spread of the pESI plasmid, treatment of this infection would be compromised.}, } @article {pmid36000891, year = {2022}, author = {Holm, MKA and Jørgensen, KM and Bagge, K and Worning, P and Pedersen, M and Westh, H and Monk, JM and Bartels, MD}, title = {Estimated Roles of the Carrier and the Bacterial Strain When Methicillin-Resistant Staphylococcus aureus Decolonization Fails: a Case-Control Study.}, journal = {Microbiology spectrum}, volume = {10}, number = {5}, pages = {e0129622}, pmid = {36000891}, issn = {2165-0497}, mesh = {Humans ; Adolescent ; *Methicillin-Resistant Staphylococcus aureus/genetics ; *Staphylococcal Infections/drug therapy/epidemiology/microbiology ; Case-Control Studies ; Quality of Life ; Anti-Bacterial Agents/therapeutic use ; Carrier State/epidemiology ; }, abstract = {Methicillin-resistant Staphylococcus aureus (MRSA) is a common bacterial pathogen that frequently colonizes healthy individuals, with potential to cause invasive infection. In Denmark, to keep the prevalence low, MRSA carriers are recommended to undergo decolonization treatments, but achieving decolonization is challenging. Knowledge about the factors contributing to decolonization is scarce. We aimed to identify bacterial genome and clinical factors influencing MRSA decolonization. We identified all new MRSA patients above 2 years of age within the Hvidovre catchment area, Copenhagen, Denmark, in 2017 and 2018. Carriers were defined as chronic carriers (cases) if they were MRSA positive after two or more treatments and as nonchronic carriers (controls) if they were MRSA free after the first or second treatment. Using whole-genome sequencing (WGS), we constructed a pangenome of bacterial strains. With the incorporation of bacterial genome and clinical patient data, machine learning and multivariate analyses were performed to determine the factors associated with decolonization. A total of 477 MRSA carriers were included. An age of ≥13 years was significantly associated with nonchronic carriage. We identified 278 bacterial genetic features that were statistically significantly associated with chronic carriage (P < 0.05 by Fisher's exact test). Chronic MRSA carriage was predicted with 68% accuracy using a combination of bacterial genome data and patient clinical data. Decolonization success is multifactorial. Apart from the 68% predicted accuracy found in this study, we estimate that the remaining 32% is a result of host factors and microbiome composition. IMPORTANCE Carriage of methicillin-resistant Staphylococcus aureus (MRSA) and other multiresistant bacteria is a prerequisite for infection and transmission. Successful decolonization treatment removes these risks. We aimed to identify bacterial genome and host clinical factors that influence MRSA decolonization to estimate the roles of the carrier and the bacterial strain, respectively, when decolonization fails. The long-term goal, beyond this study, is to optimize decolonization success, minimize MRSA transmission, and, ultimately, improve the quality of life of MRSA carriers.}, } @article {pmid35999561, year = {2022}, author = {Gui, S and Wei, W and Jiang, C and Luo, J and Chen, L and Wu, S and Li, W and Wang, Y and Li, S and Yang, N and Li, Q and Fernie, AR and Yan, J}, title = {A pan-Zea genome map for enhancing maize improvement.}, journal = {Genome biology}, volume = {23}, number = {1}, pages = {178}, pmid = {35999561}, issn = {1474-760X}, mesh = {Chromosome Mapping/methods ; Domestication ; *Genome, Plant ; Humans ; Plant Breeding/methods ; *Zea mays/genetics ; }, abstract = {BACKGROUND: Maize (Zea mays L.) is at the vanguard facing the upcoming breeding challenges. However, both a super pan-genome for the Zea genus and a comprehensive genetic variation map for maize breeding are still lacking.

RESULTS: Here, we construct an approximately 6.71-Gb pan-Zea genome that contains around 4.57-Gb non-B73 reference sequences from fragmented de novo assemblies of 721 pan-Zea individuals. We annotate a total of 58,944 pan-Zea genes and find around 44.34% of them are dispensable in the pan-Zea population. Moreover, 255,821 common structural variations are identified and genotyped in a maize association mapping panel. Further analyses reveal gene presence/absence variants and their potential roles during domestication of maize. Combining genetic analyses with multi-omics data, we demonstrate how structural variants are associated with complex agronomic traits.

CONCLUSIONS: Our results highlight the underexplored role of the pan-Zea genome and structural variations to further understand domestication of maize and explore their potential utilization in crop improvement.}, } @article {pmid35993719, year = {2022}, author = {Baker, JL}, title = {Using Nanopore Sequencing to Obtain Complete Bacterial Genomes from Saliva Samples.}, journal = {mSystems}, volume = {7}, number = {5}, pages = {e0049122}, pmid = {35993719}, issn = {2379-5077}, support = {K99 DE029228/DE/NIDCR NIH HHS/United States ; K99-DE029228//HHS | NIH | National Institute of Dental and Craniofacial Research (NIDCR)/ ; }, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Nanopore Sequencing/methods ; Saliva ; Genome, Bacterial/genetics ; *Microbiota/genetics ; Bacteria/genetics ; }, abstract = {Obtaining complete, high-quality reference genomes is essential to the study of any organism. Recent advances in nanopore sequencing, as well as genome assembly and analysis methods, have made it possible to obtain complete bacterial genomes from metagenomic (i.e., multispecies) samples, including those from the human microbiome. In this study, methods are presented to obtain complete bacterial genomes from human saliva using complementary Oxford Nanopore (ONT) and Illumina sequencing. Applied to 3 human saliva samples, these methods resulted in 11 complete bacterial genomes: 3 Saccharibacteria clade G6 (also known as Ca. Nanogingivalaceae HMT-870), 1 Saccharibacteria clade G1 HMT-348, 2 Rothia mucilaginosa, 2 Actinomyces graevenitzii, 1 Mogibacterium diversum, 1 Lachnospiraceae HMT-096, and 1 Lancefieldella parvula; and one circular chromosome of Ruminococcaceae HMT-075 (which likely has at least 2 chromosomes). The 4 Saccharibacteria genomes, as well as the Actinomyces graeventizii genomes, represented the first complete genomes from their respective bacterial taxa. Aside from the complete genomes, the assemblies contained 147 contigs of over 500,000 bp each and thousands of smaller contigs, together representing a myriad of additional draft genomes including many which are likely nearly complete. The complete genomes enabled highly accurate pangenome analysis, which identified unique and missing features of each genome compared to its closest relatives with complete genomes available in public repositories. These features provide clues as to the lifestyle and ecological role of these bacteria within the human oral microbiota, which will be particularly useful in designing future studies of the taxa that have never been isolated or cultivated. IMPORTANCE Obtaining complete and accurate genomes is crucial to the study of any organism. Previously, obtaining complete genomes of bacteria, including those of the human microbiome, frequently required isolation of the organism, as well as low-throughput, manual sequencing methods to resolve repeat regions. Advancements in long-read sequencing technologies, including Oxford Nanopore (ONT), have made it possible to obtain complete, closed bacterial genomes from metagenomic samples. This study reports methods to obtain complete genomes from the human oral microbiome using complementary ONT and Illumina sequencing of saliva samples. Eleven complete genomes were obtained from 3 human saliva samples, with genomes of Saccharibacteria HMT-870, Saccharibacteria HMT-348, and Actinomyces graeventzii being the first complete genomes from their respective taxa. Obtaining complete bacterial genomes in a high-throughput manner will help illuminate the metabolic and ecological roles of important members of the human microbiota, particularly those that have remained recalcitrant to isolation and cultivation.}, } @article {pmid35991422, year = {2022}, author = {Jia, Y and Pradeep, K and Vance, WH and Zhang, X and Weir, B and Wei, H and Deng, Z and Zhang, Y and Xu, X and Zhao, C and Berger, JD and Bell, RW and Li, C}, title = {Identification of two chickpea multidrug and toxic compound extrusion transporter genes transcriptionally upregulated upon aluminum treatment in root tips.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {909045}, pmid = {35991422}, issn = {1664-462X}, abstract = {Aluminum (Al) toxicity poses a significant challenge for the yield improvement of chickpea, which is an economically important legume crop with high nutritional value in human diets. The genetic basis of Al-tolerance in chickpea remains unclear. Here, we assessed the Al-tolerance of 8 wild Cicer and one cultivated chickpea (PBA Pistol) accessions by measuring the root elongation in solution culture under control (0 μM Al[3+]) and Al treatments (15, 30 μM Al[3+]). Compared to PBA Pistol, the wild Cicer accessions displayed both tolerant and sensitive phenotypes, supporting wild Cicer as a potential genetic pool for Al-tolerance improvement. To identify potential genes related to Al-tolerance in chickpea, genome-wide screening of multidrug and toxic compound extrusion (MATE) encoding genes was performed. Fifty-six MATE genes were identified in total, which can be divided into 4 major phylogenetic groups. Four chickpea MATE genes (CaMATE1-4) were clustered with the previously characterized citrate transporters MtMATE66 and MtMATE69 in Medicago truncatula. Transcriptome data showed that CaMATE1-4 have diverse expression profiles, with CaMATE2 being root-specific. qRT-PCR analyses confirmed that CaMATE2 and CaMATE4 were highly expressed in root tips and were up-regulated upon Al treatment in all chickpea lines. Further measurement of carboxylic acids showed that malonic acid, instead of malate or citrate, is the major extruded acid by Cicer spp. root. Protein structural modeling analyses revealed that CaMATE2 has a divergent substrate-binding cavity from Arabidopsis AtFRD3, which may explain the different acid-secretion profile for chickpea. Pangenome survey showed that CaMATE1-4 have much higher genetic diversity in wild Cicer than that in cultivated chickpea. This first identification of CaMATE2 and CaMATE4 responsive to Al[3+] treatment in Cicer paves the way for future functional characterization of MATE genes in Cicer spp., and to facilitate future design of gene-specific markers for Al-tolerant line selection in chickpea breeding programs.}, } @article {pmid35977842, year = {2022}, author = {Zhou, Y and Yang, L and Han, X and Han, J and Hu, Y and Li, F and Xia, H and Peng, L and Boschiero, C and Rosen, BD and Bickhart, DM and Zhang, S and Guo, A and Van Tassell, CP and Smith, TPL and Yang, L and Liu, GE}, title = {Assembly of a pangenome for global cattle reveals missing sequences and novel structural variations, providing new insights into their diversity and evolutionary history.}, journal = {Genome research}, volume = {32}, number = {8}, pages = {1585-1601}, pmid = {35977842}, issn = {1549-5469}, abstract = {A cattle pangenome representation was created based on the genome sequences of 898 cattle representing 57 breeds. The pangenome identified 83 Mb of sequence not found in the cattle reference genome, representing 3.1% novel sequence compared with the 2.71-Gb reference. A catalog of structural variants developed from this cattle population identified 3.3 million deletions, 0.12 million inversions, and 0.18 million duplications. Estimates of breed ancestry and hybridization between cattle breeds using insertion/deletions as markers were similar to those produced by single nucleotide polymorphism-based analysis. Hundreds of deletions were observed to have stratification based on subspecies and breed. For example, an insertion of a Bov-tA1 repeat element was identified in the first intron of the APPL2 gene and correlated with cattle breed geographic distribution. This insertion falls within a segment overlapping predicted enhancer and promoter regions of the gene, and could affect important traits such as immune response, olfactory functions, cell proliferation, and glucose metabolism in muscle. The results indicate that pangenomes are a valuable resource for studying diversity and evolutionary history, and help to delineate how domestication, trait-based breeding, and adaptive introgression have shaped the cattle genome.}, } @article {pmid35976181, year = {2022}, author = {Sancho, R and Catalán, P and Contreras-Moreira, B and Juenger, TE and Des Marais, DL}, title = {Patterns of pan-genome occupancy and gene coexpression under water-deficit in Brachypodium distachyon.}, journal = {Molecular ecology}, volume = {31}, number = {20}, pages = {5285-5306}, pmid = {35976181}, issn = {1365-294X}, mesh = {*Brachypodium/genetics ; Droughts ; Genes, Plant ; Transcriptome/genetics ; Water ; }, abstract = {Natural populations are characterized by abundant genetic diversity driven by a range of different types of mutation. The tractability of sequencing complete genomes has allowed new insights into the variable composition of genomes, summarized as a species pan-genome. These analyses demonstrate that many genes are absent from the first reference genomes, whose analysis dominated the initial years of the genomic era. Our field now turns towards understanding the functional consequence of these highly variable genomes. Here, we analysed weighted gene coexpression networks from leaf transcriptome data for drought response in the purple false brome Brachypodium distachyon and the differential expression of genes putatively involved in adaptation to this stressor. We specifically asked whether genes with variable "occupancy" in the pan-genome - genes which are either present in all studied genotypes or missing in some genotypes - show different distributions among coexpression modules. Coexpression analysis united genes expressed in drought-stressed plants into nine modules covering 72 hub genes (87 hub isoforms), and genes expressed under controlled water conditions into 13 modules, covering 190 hub genes (251 hub isoforms). We find that low occupancy pan-genes are under-represented among several modules, while other modules are over-enriched for low-occupancy pan-genes. We also provide new insight into the regulation of drought response in B. distachyon, specifically identifying one module with an apparent role in primary metabolism that is strongly responsive to drought. Our work shows the power of integrating pan-genomic analysis with transcriptomic data using factorial experiments to understand the functional genomics of environmental response.}, } @article {pmid35974988, year = {2022}, author = {Haque, F and Jabeen, I and Keya, CA and Shuvo, SR}, title = {Whole-genome sequencing and comparative analysis of heavy metals tolerant Bacillus anthracis FHq strain isolated from tannery effluents in Bangladesh.}, journal = {AIMS microbiology}, volume = {8}, number = {2}, pages = {227-239}, pmid = {35974988}, issn = {2471-1888}, abstract = {Heavy metal contamination of the environment is a primary concern in Bangladesh. This study aims to characterize a novel heavy metal tolerant strain, Bacillus anthracis FHq, isolated from the tannery effluents of Savar, Bangladesh. The strain could tolerate up to 5 mM of lead nitrate, 2.5 mM of sodium arsenate, chromium chloride, cobalt chloride, 1.5 mM cadmium acetate, and 1 mM of sodium arsenite. Whole-genome sequencing analysis revealed that the genome of the strain is around 5.2 Mbp long, and the G + C content is 35.4%. Besides, FHq has genes cadC, zntA, arsCR, czcD, and chrA, which confer lead, arsenic, cobalt, and chromium resistance, respectively. A total of nineteen other closely related and completely sequenced B. anthracis strains were selected based on average nucleotide identity along with the FHq strain for phylogenomic and pan-genome analysis. The phylogenomic analysis predicted the inter-genomic evolutionary relationship of the strain isolated from Bangladesh, and it was closely related to a strain isolated from China. Pan-genome analysis revealed that the FHq strain possesses 6045 pan genes, 3802 core genes, and 152 unique genes in its genomic content. Hence, the genetic information and comparative analysis of the FHq strain might facilitate identifying the mechanisms conferring high resistance to lead in B. anthracis strains isolated from Bangladesh.}, } @article {pmid35974327, year = {2022}, author = {Garza, DR and von Meijenfeldt, FAB and van Dijk, B and Boleij, A and Huynen, MA and Dutilh, BE}, title = {Nutrition or nature: using elementary flux modes to disentangle the complex forces shaping prokaryote pan-genomes.}, journal = {BMC ecology and evolution}, volume = {22}, number = {1}, pages = {101}, pmid = {35974327}, issn = {2730-7182}, mesh = {Archaea/genetics ; Bacteria/genetics ; *Evolution, Molecular ; *Genome, Bacterial/genetics ; Genomics ; Humans ; Phylogeny ; Prokaryotic Cells ; }, abstract = {BACKGROUND: Microbial pan-genomes are shaped by a complex combination of stochastic and deterministic forces. Even closely related genomes exhibit extensive variation in their gene content. Understanding what drives this variation requires exploring the interactions of gene products with each other and with the organism's external environment. However, to date, conceptual models of pan-genome dynamics often represent genes as independent units and provide limited information about their mechanistic interactions.

RESULTS: We simulated the stochastic process of gene-loss using the pooled genome-scale metabolic reaction networks of 46 taxonomically diverse bacterial and archaeal families as proxies for their pan-genomes. The frequency by which reactions are retained in functional networks when stochastic gene loss is simulated in diverse environments allowed us to disentangle the metabolic reactions whose presence depends on the metabolite composition of the external environment (constrained by "nutrition") from those that are independent of the environment (constrained by "nature"). By comparing the frequency of reactions from the first group with their observed frequencies in bacterial and archaeal families, we predicted the metabolic niches that shaped the genomic composition of these lineages. Moreover, we found that the lineages that were shaped by a more diverse metabolic niche also occur in more diverse biomes as assessed by global environmental sequencing datasets.

CONCLUSION: We introduce a computational framework for analyzing and interpreting pan-reactomes that provides novel insights into the ecological and evolutionary drivers of pan-genome dynamics.}, } @article {pmid35972150, year = {2022}, author = {Wittmers, F and Needham, DM and Hehenberger, E and Giovannoni, SJ and Worden, AZ}, title = {Genomes from Uncultivated Pelagiphages Reveal Multiple Phylogenetic Clades Exhibiting Extensive Auxiliary Metabolic Genes and Cross-Family Multigene Transfers.}, journal = {mSystems}, volume = {7}, number = {5}, pages = {e0152221}, pmid = {35972150}, issn = {2379-5077}, mesh = {Humans ; Phylogeny ; Genome, Viral ; *Bacteriophages ; *Podoviridae ; Bacteria/genetics ; Myoviridae/genetics ; }, abstract = {For the abundant marine Alphaproteobacterium Pelagibacter (SAR11), and other bacteria, phages are powerful forces of mortality. However, little is known about the most abundant Pelagiphages in nature, such as the widespread HTVC023P-type, which is currently represented by two cultured phages. Using viral metagenomic data sets and fluorescence-activated cell sorting, we recovered 80 complete, undescribed Podoviridae genomes that form 10 phylogenomically distinct clades (herein, named Clades I to X) related to the HTVC023P-type. These expanded the HTVC023P-type pan-genome by 15-fold and revealed 41 previously unknown auxiliary metabolic genes (AMGs) in this viral lineage. Numerous instances of partner-AMGs (colocated and involved in related functions) were observed, including partners in nucleotide metabolism, DNA hypermodification, and Curli biogenesis. The Type VIII secretion system (T8SS) responsible for Curli biogenesis was identified in nine genomes and expanded the repertoire of T8SS proteins reported thus far in viruses. Additionally, the identified T8SS gene cluster contained an iron-dependent regulator (FecR), as well as a histidine kinase and adenylate cyclase that can be implicated in T8SS function but are not within T8SS operons in bacteria. While T8SS are lacking in known Pelagibacter, they contribute to aggregation and biofilm formation in other bacteria. Phylogenetic reconstructions of partner-AMGs indicate derivation from cellular lineages with a more recent transfer between viral families. For example, homologs of all T8SS genes are present in syntenic regions of distant Myoviridae Pelagiphages, and they appear to have alphaproteobacterial origins with a later transfer between viral families. The results point to an unprecedented multipartner-AMG transfer between marine Myoviridae and Podoviridae. Together with the expansion of known metabolic functions, our studies provide new prospects for understanding the ecology and evolution of marine phages and their hosts. IMPORTANCE One of the most abundant and diverse marine bacterial groups is Pelagibacter. Phages have roles in shaping Pelagibacter ecology; however, several Pelagiphage lineages are represented by only a few genomes. This paucity of data from even the most widespread lineages has imposed limits on the understanding of the diversity of Pelagiphages and their impacts on hosts. Here, we report 80 complete genomes, assembled directly from environmental data, which are from undescribed Pelagiphages and render new insights into the manipulation of host metabolism during infection. Notably, the viruses have functionally related partner genes that appear to be transferred between distant viruses, including a suite that encode a secretion system which both brings a new functional capability to the host and is abundant in phages across the ocean. Together, these functions have important implications for phage evolution and for how Pelagiphage infection influences host biology in manners extending beyond canonical viral lysis and mortality.}, } @article {pmid35966654, year = {2022}, author = {Xue, M and Huang, X and Xue, J and He, R and Liang, G and Liang, H and Liu, J and Wen, C}, title = {Comparative Genomic Analysis of Seven Vibrio alginolyticus Strains Isolated From Shrimp Larviculture Water With Emphasis on Chitin Utilization.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {925747}, pmid = {35966654}, issn = {1664-302X}, abstract = {The opportunistic pathogen Vibrio alginolyticus is gaining attention because of its disease-causing risks to aquatic animals and humans. In this study, seven Vibrio strains isolated from different shrimp hatcheries in Southeast China were subjected to genome sequencing and subsequent comparative analysis to explore their intricate relationships with shrimp aquaculture. The seven isolates had an average nucleotide identity of ≥ 98.3% with other known V. alginolyticus strains. The species V. alginolyticus had an open pan-genome, with the addition of ≥ 161 novel genes following each new genome for seven isolates and 14 publicly available V. alginolyticus strains. The percentages of core genes of the seven strains were up to 83.1-87.5%, indicating highly conserved functions, such as chitin utilization. Further, a total of 14 core genes involved in the chitin degradation pathway were detected on the seven genomes with a single copy, 12 of which had undergone significant purifying selection (dN/dS < 1). Moreover, the seven strains could utilize chitin as the sole carbon-nitrogen source. In contrast, mobile genetic elements (MGEs) were identified in seven strains, including plasmids, prophages, and genomic islands, which mainly encoded accessory genes annotated as hypothetical proteins. The infection experiment showed that four of the seven strains might be pathogenic because the survival rates of Litopenaeus vannamei postlarvae were significantly reduced (P < 0.05) when compared to the control. However, no obvious correlation was noted between the number of putative virulence factors and toxic effects of the seven strains. Collectively, the persistence of V. alginolyticus in various aquatic environments may be attributed to its high genomic plasticity via the acquisition of novel genes by various MGEs. In view of the strong capability of chitin utilization by diverse vibrios, the timely removal of massive chitin-rich materials thoroughly in shrimp culture systems may be a key strategy to inhibit proliferation of vibrios and subsequent infection of shrimp. In addition, transcontinental transfer of potentially pathogenic V. alginolyticus strains should receive great attention to avoid vibriosis.}, } @article {pmid35964310, year = {2022}, author = {Dmitriev, AA and Pushkova, EN and Melnikova, NV}, title = {[Plant Genome Sequencing: Modern Technologies and Novel Opportunities for Breeding].}, journal = {Molekuliarnaia biologiia}, volume = {56}, number = {4}, pages = {531-545}, doi = {10.31857/S0026898422040048}, pmid = {35964310}, issn = {0026-8984}, mesh = {Base Sequence ; Chromosome Mapping ; *Genome, Plant ; *Plant Breeding ; Plants/genetics ; Sequence Analysis, DNA/methods ; }, abstract = {The investigation of plant genomes is of great importance for basic research and practical breeding. In 1977, F. Sanger proposed a DNA sequencing method, which allowed the complete sequences of a number of genomes to be determined. Then high-throughput and cost-effective next-generation/second-generation sequencing methods, producing up to billions of short reads, made it possible to sequence genomes of a significant number of species and provided a breakthrough in plant genetic studies. Finally, third-generation sequencing technologies allowed the determination of single-molecule sequences up to a million nucleotides in length, which is key for high-quality genome assemblies. An important task is to obtain a pan-genome, which includes an entire set of nucleotide sequences presented in various genotypes of the same species. The sequencing of plant genomes made it possible to assess intraspecific polymorphism, identify key genes influencing the formation of significant features, and develop molecular markers of economically valuable traits and this has become the basis for the development of marker-assisted and genomic selection. This review provides information on the latest advances in sequencing technologies and the assembly of plant genomes, as well as the opportunities that they open up for basic and applied works.}, } @article {pmid35958219, year = {2022}, author = {Hu, G and Cheng, L and Cheng, Y and Mao, W and Qiao, Y and Lan, Y}, title = {Pan-genome analysis of three main Chinese chestnut varieties.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {916550}, pmid = {35958219}, issn = {1664-462X}, abstract = {Chinese chestnut (Castanea mollissima Blume) is one of the earliest domesticated plants of high nutritional and ecological value, yet mechanisms of C. mollissima underlying its growth and development are poorly understood. Although individual chestnut species differ greatly, the molecular basis of the formation of their characteristic traits remains unknown. Though the draft genomes of chestnut have been previously released, the pan-genome of different variety needs to be studied. We report the genome sequence of three cultivated varieties of chestnut herein, namely Hei-Shan-Zhai-7 (H7, drought-resistant variety), Yan-Hong (YH, easy-pruning variety), and Yan-Shan-Zao-Sheng (ZS, early-maturing variety), to expedite convenience and efficiency in its genetics-based breeding. We obtained three chromosome-level chestnut genome assemblies through a combination of Oxford Nanopore technology, Illumina HiSeq X, and Hi-C mapping. The final genome assemblies are 671.99 Mb (YH), 790.99 Mb (ZS), and 678.90 Mb (H7), across 12 chromosomes, with scaffold N50 sizes of 50.50 Mb (YH), 65.05 Mb (ZS), and 52.16 Mb (H7). Through the identification of homologous genes and the cluster analysis of gene families, we found that H7, YH and ZS had 159, 131, and 91 unique gene families, respectively, and there were 13,248 single-copy direct homologous genes in the three chestnut varieties. For the convenience of research, the chestnut genome database was constructed. Based on the results of gene family identification, the presence/absence variations (PAVs) information of the three sample genes was calculated, and a total of 2,364, 2,232, and 1,475 unique genes were identified in H7, YH and ZS, respectively. Our results suggest that the GBSS II-b gene family underwent expansion in chestnut (relative to nearest source species). Overall, we developed high-quality and well-annotated genome sequences of three C. mollissima varieties, which will facilitate clarifying the molecular mechanisms underlying important traits, and shortening the breeding process.}, } @article {pmid35956427, year = {2022}, author = {Petereit, J and Bayer, PE and Thomas, WJW and Tay Fernandez, CG and Amas, J and Zhang, Y and Batley, J and Edwards, D}, title = {Pangenomics and Crop Genome Adaptation in a Changing Climate.}, journal = {Plants (Basel, Switzerland)}, volume = {11}, number = {15}, pages = {}, pmid = {35956427}, issn = {2223-7747}, support = {DP210100296//Australian Research Council/ ; DP200100762//Australian Research Council/ ; }, abstract = {During crop domestication and breeding, wild plant species have been shaped into modern high-yield crops and adapted to the main agro-ecological regions. However, climate change will impact crop productivity in these regions, and agriculture needs to adapt to support future food production. On a global scale, crop wild relatives grow in more diverse environments than crop species, and so may host genes that could support the adaptation of crops to new and variable environments. Through identification of individuals with increased climate resilience we may gain a greater understanding of the genomic basis for this resilience and transfer this to crops. Pangenome analysis can help to identify the genes underlying stress responses in individuals harbouring untapped genomic diversity in crop wild relatives. The information gained from the analysis of these pangenomes can then be applied towards breeding climate resilience into existing crops or to re-domesticating crops, combining environmental adaptation traits with crop productivity.}, } @article {pmid35945191, year = {2022}, author = {Xia, F and Jiang, M and Wen, Z and Wang, Z and Wang, M and Xu, Y and Zhuge, X and Dai, J}, title = {Complete genomic analysis of ST117 lineage extraintestinal pathogenic Escherichia coli (ExPEC) to reveal multiple genetic determinants to drive its global transmission: ST117 E. coli as an emerging multidrug-resistant foodborne ExPEC with zoonotic potential.}, journal = {Transboundary and emerging diseases}, volume = {69}, number = {6}, pages = {3256-3273}, doi = {10.1111/tbed.14678}, pmid = {35945191}, issn = {1865-1682}, support = {CX(21)3126//Jiangsu Agricultural Science and Technology Innovation Fund/ ; 32172855//National Natural Science Foundation of China/ ; }, mesh = {Animals ; Humans ; Escherichia coli/genetics ; *Extraintestinal Pathogenic Escherichia coli ; *Escherichia coli Infections/epidemiology/veterinary ; Birds ; Genomics ; *Poultry Diseases/epidemiology ; Phylogeny ; Chickens ; Virulence Factors/genetics ; }, abstract = {Avian pathogenic Escherichia coli (APEC) is recognized as a primary source of foodborne extraintestinal pathogenic E. coli (ExPEC), which poses a significant risk of extraintestinal infections in humans. The potential of human infection with ST117 lineage APEC/ExPEC from poultry is particularly concerning. However, relatively few whole-genome studies have focused on ST117 as an emerging ExPEC lineage. In this study, the complete genomes of 11 avian ST117 isolates and the draft genomes of 20 ST117 isolates in China were sequenced to reveal the genomic islands and large plasmid composition of ST117 APEC. With reference to the extensive E. coli genomes available in public databases, large-scale comprehensive genomic analysis of the ST117 lineage APEC/ExPEC was performed to reveal the features of the ST117 pan-genome and population. The high variability of the accessory genome emphasized the diversity and dynamic traits of the ST117 pan-genome. ST117 isolates recovered from different hosts and geographic sources were randomly located on a phylogeny tree, suggesting that ST117 E. coli lacked host specificity. A time-scaled phylogeny tree showed that ST117 was a recent E. coli lineage with a relatively short evolutionary period. Further characterization of a wide diversity of ExPEC-related virulence genes, pathogenicity islands (PAIs), and resistance genes of the ST117 pan-genome provided insights into the virulence and resistance of ST117 APEC/ExPEC. The results suggested zoonotic potential of ST117 APEC/ExPEC between birds and humans. Moreover, genomic analysis showed that a pool of diverse plasmids drove the virulence and multidrug resistance of ST117 APEC/ExPEC. Several types of large plasmids were scattered across the ST117 isolates, but there was no strong plasmid-clade adaptation. Combined with the pan-genome analysis, a double polymerase chain reaction (PCR) method was designed for rapid and cost-effective detection of ST117 isolates from various avian and human APEC/ExPEC isolates. Overall, this study addressed a gap in current knowledge about the ST117 APEC/ExPEC genome, with significant implications to understand the success and spread of ST117 APEC/ExPEC.}, } @article {pmid35944516, year = {2023}, author = {Goldman, AD and Kaçar, B}, title = {Very early evolution from the perspective of microbial ecology.}, journal = {Environmental microbiology}, volume = {25}, number = {1}, pages = {5-10}, doi = {10.1111/1462-2920.16144}, pmid = {35944516}, issn = {1462-2920}, mesh = {*Evolution, Molecular ; Genome ; Ecology ; Gene Transfer, Horizontal ; *Microbiota ; Phylogeny ; Biological Evolution ; }, abstract = {The universal ancestor at the root of the species tree of life depicts a population of organisms with a surprising degree of complexity, posessing genomes and translation systems much like that of microbial life today. As the first life forms were most likely to have been simple replicators, considerable evolutionary change must have taken place prior to the last universal common ancestor. It is often assumed that the lack of earlier branches on the tree of life is due to a prevalence of random horizontal gene transfer that obscured the delineations between lineages and hindered their divergence. Therefore, principles of microbial evolution and ecology may give us some insight into these early stages in the history of life. Here, we synthesize the current understanding of organismal and genome evolution from the perspective of microbial ecology and apply these evolutionary principles to the earliest stages of life on Earth. We focus especially on broad evolutionary modes pertaining to horizontal gene transfer, pangenome structure, and microbial mat communities.}, } @article {pmid35935202, year = {2022}, author = {Camargo, A and Guerrero-Araya, E and Castañeda, S and Vega, L and Cardenas-Alvarez, MX and Rodríguez, C and Paredes-Sabja, D and Ramírez, JD and Muñoz, M}, title = {Intra-species diversity of Clostridium perfringens: A diverse genetic repertoire reveals its pathogenic potential.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {952081}, pmid = {35935202}, issn = {1664-302X}, abstract = {Clostridium perfringens is the causative agent of many enterotoxic diseases in humans and animals, and it is present in diverse environments (soil, food, sewage, and water). Multilocus Sequence Typing (MLST) and Whole Genome Sequencing (WGS) have provided a general approach about genetic diversity of C. perfringens; however, those studies are limited to specific locations and often include a reduced number of genomes. In this study, 372 C. perfringens genomes from multiple locations and sources were used to assess the genetic diversity and phylogenetic relatedness of this pathogen. In silico MLST was used for typing the isolates, and the resulting sequence types (ST) were assigned to clonal complexes (CC) based on allelic profiles that differ from its founder by up to double-locus variants. A pangenome analysis was conducted, and a core genome-based phylogenetic tree was created to define phylogenetic groups. Additionally, key virulence factors, toxinotypes, and antibiotic resistance genes were identified using ABRicate against Virulence Factor Database (VFDB), TOXiper, and Resfinder, respectively. The majority of the C. perfringens genomes found in publicly available databases were derived from food (n = 85) and bird (n = 85) isolates. A total of 195 STs, some of them shared between sources such as food and human, horses and dogs, and environment and birds, were grouped in 25 CC and distributed along five phylogenetic groups. Fifty-three percent of the genomes were allocated to toxinotype A, followed by F (32%) and G (7%). The most frequently found virulence factors based on > 70% coverage and 99.95% identity were plc (100%), nanH (99%), ccp (99%), and colA (98%), which encode an alpha-toxin, a sialidase, an alpha-clostripain, and a collagenase, respectively, while tetA (39.5%) and tetB (36.2%), which mediate tetracycline resistance determinants, were the most common antibiotic resistance genes detected. The analyses conducted here showed a better view of the presence of this pathogen across several host species. They also confirm that the genetic diversity of C. perfringens is based on a large number of virulence factors that vary among phylogroups, and antibiotic resistance markers, especially to tetracyclines, aminoglycosides, and macrolides. Those characteristics highlight the importance of C. perfringens as a one of the most common causes of foodborne illness.}, } @article {pmid35927368, year = {2022}, author = {Sotty, J and Bablon, P and Lekbaby, B and Augustin, J and Girier-Dufournier, M and Langlois, L and Dorival, C and Carrat, F and Pol, S and Fontaine, H and Sarica, N and Neuveut, C and Housset, C and Kremdsorf, D and Schnuriger, A and Soussan, P}, title = {Diversity of the nucleic acid forms of circulating HBV in chronically infected patients and its impact on viral cycle.}, journal = {Hepatology international}, volume = {16}, number = {6}, pages = {1259-1272}, pmid = {35927368}, issn = {1936-0541}, support = {ECTZ 103985//Agence Nationale de Recherches sur le Sida et les Hépatites Virales/ ; ECTZ 163186//Agence Nationale de Recherches sur le Sida et les Hépatites Virales/ ; EQU202003010517//FRM/ ; }, mesh = {Humans ; Hepatitis B virus/genetics ; *Nucleic Acids/therapeutic use ; Prospective Studies ; DNA, Viral/genetics ; *Hepatitis B, Chronic/drug therapy ; Virus Replication ; *Hepatitis B ; RNA ; RNA, Viral/analysis ; }, abstract = {BACKGROUND: Besides the prototypical hepatitis B virus (HBV) infectious particle, which contains a full-length double-stranded DNA (flDNA), additional circulating virus-like particles, which carry pregenomic RNA (pgRNA), spliced1RNA (sp1RNA) or spliced-derived DNA (defDNA) forms have been described. We aimed to determine the level of these four circulating forms in patients and to evaluate their impact on viral lifecycle.

METHODS: Chronic HBV untreated patients (n = 162), included in the HEPATHER cohort, were investigated. Pangenomic qPCRs were set up to quantify the four circulating forms of HBV nucleic acids (HBVnaf). In vitro infection assays were performed to address the impact of HBVnaf.

RESULTS: Hierarchical clustering individualized two clusters of HBVnaf diversity among patients: (1) cluster 1 (C1) showing a predominance of flDNA; (2) cluster 2 (C2) showing various proportions of the different forms. HBeAg-positive chronic hepatitis phase and higher viral load (7.0 ± 6.4 vs 6.6 ± 6.2 Log10 copies/ml; p < 0.001) characterized C2 compared to C1 patients. Among the different HBVnaf, pgRNA was more prevalent in C1 patients with high vs low HBV viral load (22.1% ± 2.5% vs 4.1% ± 1.8% of HBVnaf, p < 0.0001) but remained highly prevalent in C2 patients, whatever the level of replication. C2 patients samples used in infection assays showed that: (1) HBVnaf secretion was independent of the viral strain; (2) the viral cycle efficiency differed according to the proportion of HBVnaf in the inoculum, independently of cccDNA formation. Inoculum enrichment before infection suggests that pgRNA-containing particles drive this impact on viral replication.

CONCLUSION: Besides the critical role of HBV replication in circulating HBVnaf diversity, our data highlight an impact of this diversity on the dynamics of viral cycle.

CLINICAL TRIAL REGISTRATION: Patients were included from a prospective multicenter French national cohort (ANRS CO22 HEPATHER, NCT01953458).}, } @article {pmid35924489, year = {2022}, author = {Meleshko, D and Yang, R and Marks, P and Williams, S and Hajirasouliha, I}, title = {Efficient detection and assembly of non-reference DNA sequences with synthetic long reads.}, journal = {Nucleic acids research}, volume = {50}, number = {18}, pages = {e108}, pmid = {35924489}, issn = {1362-4962}, support = {R35 GM138152/GM/NIGMS NIH HHS/United States ; }, mesh = {Algorithms ; Base Sequence ; *Genome, Human ; *High-Throughput Nucleotide Sequencing/methods ; Humans ; Sequence Analysis, DNA/methods ; }, abstract = {Recent pan-genome studies have revealed an abundance of DNA sequences in human genomes that are not present in the reference genome. A lion's share of these non-reference sequences (NRSs) cannot be reliably assembled or placed on the reference genome. Improvements in long-read and synthetic long-read (aka linked-read) technologies have great potential for the characterization of NRSs. While synthetic long reads require less input DNA than long-read datasets, they are algorithmically more challenging to use. Except for computationally expensive whole-genome assembly methods, there is no synthetic long-read method for NRS detection. We propose a novel integrated alignment-based and local assembly-based algorithm, Novel-X, that uses the barcode information encoded in synthetic long reads to improve the detection of such events without a whole-genome de novo assembly. Our evaluations demonstrate that Novel-X finds many non-reference sequences that cannot be found by state-of-the-art short-read methods. We applied Novel-X to a diverse set of 68 samples from the Polaris HiSeq 4000 PGx cohort. Novel-X discovered 16 691 NRS insertions of size > 300 bp (total length 18.2 Mb). Many of them are population specific or may have a functional impact.}, } @article {pmid35916725, year = {2022}, author = {Dereeper, A and Summo, M and Meyer, DF}, title = {PanExplorer: a web-based tool for exploratory analysis and visualization of bacterial pan-genomes.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {18}, pages = {4412-4414}, pmid = {35916725}, issn = {1367-4811}, support = {//European Union on the Guadeloupe Region/ ; 2018-FED-1084//European Research and Development Funds/ ; }, mesh = {*Genome, Bacterial ; Genomics ; Software ; *Libraries ; Internet ; }, abstract = {MOTIVATION: As pan-genome approaches are largely employed for bacterial comparative genomics and evolution analyses, but still difficult to be carried out by non-bioinformatician biologists, there is a need for an innovative tool facilitating the exploration of bacterial pan-genomes.

RESULTS: PanExplorer is a web application providing various genomic analyses and reports, giving intuitive views that enable a better understanding of bacterial pan-genomes. As an example, we produced the pan-genome for 121 Anaplasmataceae strains (including 30 Ehrlichia, 15 Anaplasma, 68 Wolbachia).

PanExplorer is written in Perl CGI and relies on several JavaScript libraries for visualization (hotmap.js, MauveViewer, CircosJS). It is freely available at http://panexplorer.southgreen.fr. The source code has been released in a GitHub repository https://github.com/SouthGreenPlatform/PanExplorer. A documentation section is available on PanExplorer website.}, } @article {pmid35913193, year = {2022}, author = {Zheng, X and Dai, X and Zhu, Y and Yang, J and Jiang, H and Dong, H and Huang, L}, title = {(Meta)Genomic Analysis Reveals Diverse Energy Conservation Strategies Employed by Globally Distributed Gemmatimonadota.}, journal = {mSystems}, volume = {7}, number = {4}, pages = {e0022822}, pmid = {35913193}, issn = {2379-5077}, mesh = {Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Bacteria ; Genomics ; *Microbiota/genetics ; }, abstract = {Gemmatimonadota is a phylum-level lineage distributed widely but rarely reported. Only six representatives of Gemmatimonadota have so far been isolated and cultured in laboratory. The physiology, ecology, and evolutionary history of this phylum remain unknown. The 16S rRNA gene survey of our salt lake and deep-sea sediments, and Earth Microbiome Project (EMP) samples, reveals that Gemmatimonadota exist in diverse environments globally. In this study, we retrieved 17 metagenome-assembled genomes (MAGs) from salt lake sediments (12 MAGs) and deep-sea sediments (5 MAGs). Analysis of these MAGs and the nonredundant MAGs or genomes from public databases reveals Gemmatimonadota can degrade various complex organic substrates, and mainly employ heterotrophic pathways (e.g., glycolysis and tricarboxylic acid [TCA] cycle) for growth via aerobic respiration. And the processes of sufficient energy being stored in glucose through gluconeogenesis, followed by the synthesis of more complex compounds, are prevalent in Gemmatimonadota. A highly expandable pangenome for Gemmatimonadota has been observed, which presumably results from their adaptation to thriving in diverse environments. The enrichment of the Na[+]/H[+] antiporter in the SG8-23 order represents their adaptation to salty habitats. Notably, we identified a novel lineage of the SG8-23 order, which is potentially anoxygenic phototrophic. This lineage is not closely related to the phototrophs in the order of Gemmatimonadales. The two orders differ distinctly in the gene organization and phylogenetic relationship of their photosynthesis gene clusters, indicating photosystems in Gemmatimonadota have evolved in two independent routes. IMPORTANCE The phylum Gemmatimonadota is widely distributed in various environments. However, their physiology, ecology and evolutionary history remain unknown, primary due to the limited cultured isolates and available genomes. We were intrigued to find out how widespread this phylum is, and how it can thrive under diverse conditions. Our results here expand the knowledge of the genetic and metabolic diversity of Gemmatimonadota, and shed light on the diverse energy conservation strategies (i.e., oxidative phosphorylation, substrate phosphorylation, and photosynthetic phosphorylation) responsible for their global distribution. Moreover, gene organization and phylogenetic analysis of photosynthesis gene clusters in Gemmatimonadota provide a valuable insight into the evolutionary history of photosynthesis.}, } @article {pmid35910650, year = {2022}, author = {Zhang, Y and Chu, H and Yu, L and He, F and Gao, Y and Tang, L}, title = {Analysis of the Taxonomy, Synteny, and Virulence Factors for Soft Rot Pathogen Pectobacterium aroidearum in Amorphophallus konjac Using Comparative Genomics.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {868709}, pmid = {35910650}, issn = {1664-302X}, abstract = {Bacterial soft rot is a devastating disease for a wide range of crops, vegetables, and ornamental plants including konjac (Amorphophallus konjac). However, the pangenome and genomic plasticity of the konjac soft rot pathogens is little explored. In this study, we reported the complete genome sequences of 11 bacterial isolates that can cause typical soft rot symptoms in konjac by in vitro and in vivo pathogenicity tests. Based on in silico DNA-DNA hybridization, average nucleotide identity and phylogenomic analysis, all 11 isolates were determined to be Pectobacterium aroidearum. In addition, synteny analysis of these genomes revealed considerable chromosomal inversions, one of which is triggered by homologous recombination of ribose operon. Pangenome analysis and COG enrichment analysis showed that the pangenome of P. aroidearum is open and that accessory genes are enriched in replication, recombination, and repair. Variations in type IV secretion system and type VI secretion system were found, while plant cell wall degrading enzymes were conserved. Furthermore, sequence analyses also provided evidence for the presence of a type V secretion system in Pectobacterium. These findings advance our understanding of the pathogenicity determinants, genomic plasticity, and evolution of P. aroidearum.}, } @article {pmid35909760, year = {2022}, author = {Wu, J and Xu, XD and Liu, L and Ma, L and Pu, Y and Wang, W and Hua, XY and Song, JM and Liu, K and Lu, G and Fang, Y and Li, X and Sun, W}, title = {A Chromosome Level Genome Assembly of a Winter Turnip Rape (Brassica rapa L.) to Explore the Genetic Basis of Cold Tolerance.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {936958}, pmid = {35909760}, issn = {1664-462X}, abstract = {Winter rapeseed (Brassica rapa L.) is an important overwintering oilseed crop that is widely planted in northwest China and suffers chronic low temperatures in winter. So the cold stress becomes one of the major constraints that limit its production. The currently existing genomes limit the understanding of the cold-tolerant genetic basis of rapeseed. Here we assembled a high-quality long-read genome of B. rapa "Longyou-7" cultivar, which has a cold-tolerant phenotype, and constructed a graph-based pan-genome to detect the structural variations within homologs of currently reported cold-tolerant related genes in the "Longyou-7" genome, which provides an additional elucidation of the cold-tolerant genetic basis of "Longyou-7" cultivar and promotes the development of cold-tolerant breeding in B. rapa.}, } @article {pmid35909191, year = {2022}, author = {Aurongzeb, M and Rashid, Y and Habib Ahmed Naqvi, S and Muhammad Talha Malik, H and Kamran Azim, M and Hassan, SS and Yasir, M and Karim, A}, title = {Insights into genome evolution, pan-genome, and phylogenetic implication through mitochondrial genome sequence of Naegleria fowleri species.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {13152}, pmid = {35909191}, issn = {2045-2322}, mesh = {DNA, Mitochondrial/genetics/metabolism ; DNA, Protozoan ; Evolution, Molecular ; *Genome, Mitochondrial/genetics ; *Naegleria/genetics ; *Naegleria fowleri/genetics ; Phylogeny ; }, abstract = {In the current study, we have systematically analysed the mitochondrial DNA (mtDNA) sequence of Naegleria fowleri (N. fowleri) isolate AY27, isolated from Karachi, Pakistan. The N. fowleri isolate AY27 has a circular mtDNA (49,541 bp), which harbours 69 genes (46 protein-coding genes, 21 tRNAs and 2 rRNAs). The pan-genome analysis of N. fowleri species showed a Bpan value of 0.137048, which implies that the pan-genome is open. KEGG classified core, accessory and unique gene clusters for human disease, metabolism, environmental information processing, genetic information processing and organismal system. Similarly, COG characterization of protein showed that core and accessory genes are involved in metabolism, information storages and processing, and cellular processes and signaling. The Naegleria species (n = 6) formed a total of 47 gene clusters; 42 single-copy gene clusters and 5 orthologous gene clusters. It was noted that 100% genes of Naegleria species were present in the orthogroups. We identified 44 single nucleotide polymorphisms (SNP) in the N. fowleri isolate AY27 mtDNA using N. fowleri strain V511 as a reference. Whole mtDNA phylogenetic tree analysis showed that N. fowleri isolates AY27 is closely related to N. fowleri (Accession no. JX174181.1). The ANI (Average Nucleotide Identity) values presented a much clear grouping of the Naegleria species compared to the whole mtDNA based phylogenetic analysis. The current study gives a comprehensive understanding of mtDNA architecture as well as a comparison of Naegleria species (N. fowleri and N. gruberi species) at the mitochondrial genome sequence level.}, } @article {pmid35902069, year = {2022}, author = {Singh, PK and Rawal, HC and Panda, AK and Roy, J and Mondal, TK and Sharma, TR}, title = {Pan-genomic, transcriptomic, and miRNA analyses to decipher genetic diversity and anthocyanin pathway genes among the traditional rice landraces.}, journal = {Genomics}, volume = {114}, number = {5}, pages = {110436}, doi = {10.1016/j.ygeno.2022.110436}, pmid = {35902069}, issn = {1089-8646}, mesh = {Anthocyanins ; Gene Expression Regulation, Plant ; Genetic Variation ; Genomics ; India ; *MicroRNAs/genetics/metabolism ; *Oryza/genetics/metabolism ; Transcriptome ; }, abstract = {Black rice is famous for containing high anthocyanin while Joha rice is aromatic with low anthocyanin containing rice from the North-Eastern Region (NER) of India. However, there are limited reports on the anthocyanin biosynthesis in Manipur Black rice. Therefore, the present study was aimed to understand the origin, domestication and anthocyanin biosynthesis pathways in Black rice using the next generation sequencing approaches. With the sequencing data, various analyses were carried out for differential expression and construction of a pan-genome. Protein coding RNA and small RNA sequencing analysis aided in determining 7415 and 131 differentially expressed transcripts and miRNAs, respectively in NER rice. This is the first extensive study on identification and expression analysis of miRNAs and their target genes in regulating anthocyanin biosynthesis in NER rice. This study will aid in better understanding for decoding the theory of high or low anthocyanin content in different rice genotypes.}, } @article {pmid35893567, year = {2022}, author = {Hernández-Mendoza, A and Salgado-Morales, R and Morán-Vázquez, A and López-Torres, D and García-Gómez, BI and Dantán-González, E}, title = {Molecular Characterization of pBOq-IncQ and pBOq-95LK Plasmids of Escherichia coli BOq 01, a New Isolated Strain from Poultry Farming, Involved in Antibiotic Resistance.}, journal = {Microorganisms}, volume = {10}, number = {8}, pages = {}, pmid = {35893567}, issn = {2076-2607}, support = {CONACyT PDCPN 0247780//Consejo Nacional de Ciencia y Tecnología/ ; }, abstract = {The increase in antimicrobial resistance has raised questions about how to use these drugs safely, especially in veterinary medicine, animal nutrition, and agriculture. Escherichia coli is an important human and animal pathogen that frequently contains plasmids carrying antibiotic resistance genes. Extra chromosomal elements are required for various functions or conditions in microorganisms. Several phage-like plasmids have been identified, which are important in antibiotic resistance. In this work, the molecular characterization of the pBOq-IncQ (4.5 kb) and pBOq-95LK (95 kb) plasmids found in the E. coli strain BOq 01, a multidrug resistant bacteria isolated from a poultry farm, are considered. Plasmid pBOq-IncQ belongs to the incQ incompatibility plasmid family and is involved in sulfonamide resistance. Plasmid pBOq-95LK is a lytic phage-like plasmid that is involved in the lysis of the E. coli BOq 01 strain and carries a bleomycin resistance gene and a strain cured of this plasmid shows bleomycin sensitivity. Induction of the lytic cycle indicates that this phage-like plasmid is an active phage. This type of plasmid has been reported to acquire genes such as mcr-1, which codes for colistin resistance and bacterial persistence and is a significant public health threat. A genome comparison, a pangenomic and phylogenomic analysis with other phage-like plasmids reported in the literature were performed to understand better the evolution of this kind of plasmid in bacteria and its potential importance in antibiotic resistance.}, } @article {pmid35891291, year = {2022}, author = {Alshabrmi, FM and Alrumaihi, F and Alrasheedi, SF and Al-Megrin, WAI and Almatroudi, A and Allemailem, KS}, title = {An In-Silico Investigation to Design a Multi-Epitopes Vaccine against Multi-Drug Resistant Hafnia alvei.}, journal = {Vaccines}, volume = {10}, number = {7}, pages = {}, pmid = {35891291}, issn = {2076-393X}, abstract = {Antimicrobial resistance has become a significant health issue because of the misuse of antibiotics in our daily lives, resulting in high rates of morbidity and mortality. Hafnia alvei is a rod-shaped, Gram-negative and facultative anaerobic bacteria. The medical community has emphasized H. alvei's possible association with gastroenteritis. As of now, there is no licensed vaccine for H. alvei, and as such, computer aided vaccine design approaches could be an ideal approach to highlight the potential vaccine epitopes against this bacteria. By using bacterial pan-genome analysis (BPGA), we were able to study the entire proteomes of H. alvei with the aim of developing a vaccine. Based on the analysis, 20,370 proteins were identified as core proteins, which were further used in identifying potential vaccine targets based on several vaccine candidacy parameters. The prioritized vaccine targets against the bacteria are; type 1 fimbrial protein, flagellar hook length control protein (FliK), flagellar hook associated protein (FlgK), curli production assembly/transport protein (CsgF), fimbria/pilus outer membrane usher protein, fimbria/pilus outer membrane usher protein, molecular chaperone, flagellar filament capping protein (FliD), TonB-dependent hemoglobin /transferrin/lactoferrin family receptor, Porin (OmpA), flagellar basal body rod protein (FlgF) and flagellar hook-basal body complex protein (FliE). During the epitope prediction phase, different antigenic, immunogenic, non-Allergenic, and non-Toxic epitopes were predicted for the above-mentioned proteins. The selected epitopes were combined to generate a multi-epitope vaccine construct and a cholera toxin B subunit (adjuvant) was added to enhance the vaccine's antigenicity. Downward analyses of vaccines were performed using a vaccine three-dimensional model. Docking studies have confirmed that the vaccine strongly binds with MHC-I, MHC-II, and TLR-4 immune cell receptors. Additionally, molecular dynamics simulations confirmed that the vaccine epitopes were exposed to nature and to the host immune system and interpreted strong intermolecular binding between the vaccine and receptors. Based on the results of the study, the model vaccine construct seems to have the capacity to produce protective immune responses in the host, making it an attractive candidate for further in vitro and in vivo studies.}, } @article {pmid35889115, year = {2022}, author = {Jang, H and Eshwar, A and Lehner, A and Gangiredla, J and Patel, IR and Beaubrun, JJ and Chase, HR and Negrete, F and Finkelstein, S and Weinstein, LM and Ko, K and Addy, N and Ewing, L and Woo, J and Lee, Y and Seo, K and Jaradat, Z and Srikumar, S and Fanning, S and Stephan, R and Tall, BD and Gopinath, GR}, title = {Characterization of Cronobacter sakazakii Strains Originating from Plant-Origin Foods Using Comparative Genomic Analyses and Zebrafish Infectivity Studies.}, journal = {Microorganisms}, volume = {10}, number = {7}, pages = {}, pmid = {35889115}, issn = {2076-2607}, abstract = {Cronobacter sakazakii continues to be isolated from ready-to-eat fresh and frozen produce, flours, dairy powders, cereals, nuts, and spices, in addition to the conventional sources of powdered infant formulae (PIF) and PIF production environments. To understand the sequence diversity, phylogenetic relationship, and virulence of C. sakazakii originating from plant-origin foods, comparative molecular and genomic analyses, and zebrafish infection (ZI) studies were applied to 88 strains. Whole genome sequences of the strains were generated for detailed bioinformatic analysis. PCR analysis showed that all strains possessed a pESA3-like virulence plasmid similar to reference C. sakazakii clinical strain BAA-894. Core genome analysis confirmed a shared genomic backbone with other C. sakazakii strains from food, clinical and environmental strains. Emerging nucleotide diversity in these plant-origin strains was highlighted using single nucleotide polymorphic alleles in 2000 core genes. DNA hybridization analyses using a pan-genomic microarray showed that these strains clustered according to sequence types (STs) identified by multi-locus sequence typing (MLST). PHASTER analysis identified 185 intact prophage gene clusters encompassing 22 different prophages, including three intact Cronobacter prophages: ENT47670, ENT39118, and phiES15. AMRFinderPlus analysis identified the CSA family class C β-lactamase gene in all strains and a plasmid-borne mcr-9.1 gene was identified in three strains. ZI studies showed that some plant-origin C. sakazakii display virulence comparable to clinical strains. Finding virulent plant-origin C. sakazakii possessing significant genomic features of clinically relevant STs suggests that these foods can serve as potential transmission vehicles and supports widening the scope of continued surveillance for this important foodborne pathogen.}, } @article {pmid35889095, year = {2022}, author = {Melo-Bolívar, JF and Ruiz Pardo, RY and Junca, H and Sidjabat, HE and Cano-Lozano, JA and Villamil Díaz, LM}, title = {Competitive Exclusion Bacterial Culture Derived from the Gut Microbiome of Nile Tilapia (Oreochromis niloticus) as a Resource to Efficiently Recover Probiotic Strains: Taxonomic, Genomic, and Functional Proof of Concept.}, journal = {Microorganisms}, volume = {10}, number = {7}, pages = {}, pmid = {35889095}, issn = {2076-2607}, support = {ING181-2016//Universidad de La Sabana/ ; INGPHD-6-2017//Universidad de La Sabana/ ; INGPHD-9-2019//Universidad de La Sabana/ ; 808-2018-contract CT 329-2019//MINCIENCIAS- Patrimonio Autónomo Fondo Nacional de Financiamiento para la Ciencia, la Tecnología y la Innovación Francisco José de Caldas/ ; 727-2015, contract CT 122-2017//MINCIENCIAS/ ; }, abstract = {This study aims to mine a previously developed continuous-flow competitive exclusion culture (CFCEC) originating from the Tilapia gut microbiome as a rational and efficient autochthonous probiotic strain recovery source. Three isolated strains were tested on their adaptability to host gastrointestinal conditions, their antibacterial activities against aquaculture bacterial pathogens, and their antibiotic susceptibility patterns. Their genomes were fully sequenced, assembled, annotated, and relevant functions inferred, such as those related to pinpointed probiotic activities and phylogenomic comparative analyses to the closer reported strains/species relatives. The strains are possible candidates of novel genus/species taxa inside Lactococcus spp. and Priestia spp. (previously known as Bacillus spp.) These results were consistent with reports on strains inside these phyla exhibiting probiotic features, and the strains we found are expanding their known diversity. Furthermore, their pangenomes showed that these bacteria have indeed a set of so far uncharacterized genes that may play a role in the antagonism to competing strains or specific symbiotic adaptations to the fish host. In conclusion, CFCEC proved to effectively allow the enrichment and further pure culture isolation of strains with probiotic potential.}, } @article {pmid35889069, year = {2022}, author = {Bukhari, SAR and Irfan, M and Ahmad, I and Chen, L}, title = {Comparative Genomics and Pan-Genome Driven Prediction of a Reduced Genome of Akkermansia muciniphila.}, journal = {Microorganisms}, volume = {10}, number = {7}, pages = {}, pmid = {35889069}, issn = {2076-2607}, support = {RGP.02-87-43//King Khalid University/ ; 31572150//National Natural Science Foundation of China/ ; }, abstract = {Akkermanisia muciniphila imparts important health benefits and is considered a next-generation probiotic. It is imperative to understand the genomic diversity and metabolic potential of the species for safer applications as probiotics. As it resides with both health-promoting and pathogenic bacteria, understanding the evolutionary patterns are crucial, but this area remains largely unexplored. Moreover, pan-genome has previously been established based on only a limited number of strains and without careful strain selection. The pan-genomics have become very important for understanding species diversity and evolution. In the current study, a systematic approach was used to find a refined pan-genome profile of A. muciniphila by excluding too-diverse strains based on average nucleotide identity-based species demarcation. The strains were divided into four phylogroups using a variety of clustering techniques. Horizontal gene transfer and recombination patterns were also elucidated. Evolutionary patterns revealed that different phylogroups were expanding differently. Furthermore, a comparative evaluation of the metabolic potential of the pan-genome and its subsections was performed. Lastly, the study combines functional annotation, persistent genome, and essential genes to devise an approach to determine a minimal genome that can systematically remove unwanted genes, including virulent factors. The selection of one strain to be used as a chassis for the prediction of a reduced genome was very carefully performed by analyzing several genomic parameters, including the number of unique genes and the resistance and pathogenic potential of the strains. The strategy could be applied to other microbes, including human-associated microbiota, towards a common goal of predicting a minimal or a reduced genome.}, } @article {pmid35887414, year = {2022}, author = {Maphosa, MN and Steenkamp, ET and Kanzi, AM and van Wyk, S and De Vos, L and Santana, QC and Duong, TA and Wingfield, BD}, title = {Intra-Species Genomic Variation in the Pine Pathogen Fusarium circinatum.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {7}, pages = {}, pmid = {35887414}, issn = {2309-608X}, support = {98353//South African Department of Science and Innovation's South African Research Chair Initiative/ ; 40945//DSI-NRF Centre of Excellence in Plant Health Biotechnology/ ; }, abstract = {Fusarium circinatum is an important global pathogen of pine trees. Genome plasticity has been observed in different isolates of the fungus, but no genome comparisons are available. To address this gap, we sequenced and assembled to chromosome level five isolates of F. circinatum. These genomes were analysed together with previously published genomes of F. circinatum isolates, FSP34 and KS17. Multi-sample variant calling identified a total of 461,683 micro variants (SNPs and small indels) and a total of 1828 macro structural variants of which 1717 were copy number variants and 111 were inversions. The variant density was higher on the sub-telomeric regions of chromosomes. Variant annotation revealed that genes involved in transcription, transport, metabolism and transmembrane proteins were overrepresented in gene sets that were affected by high impact variants. A core genome representing genomic elements that were conserved in all the isolates and a non-redundant pangenome representing all genomic elements is presented. Whole genome alignments showed that an average of 93% of the genomic elements were present in all isolates. The results of this study reveal that some genomic elements are not conserved within the isolates and some variants are high impact. The described genome-scale variations will help to inform novel disease management strategies against the pathogen.}, } @article {pmid35887246, year = {2022}, author = {Li, G and Shu, J and Jin, J and Shu, J and Feng, H and Chen, J and He, Y}, title = {Development of a Multi-Epitope Vaccine for Mycoplasma hyopneumoniae and Evaluation of Its Immune Responses in Mice and Piglets.}, journal = {International journal of molecular sciences}, volume = {23}, number = {14}, pages = {}, pmid = {35887246}, issn = {1422-0067}, support = {2022SNJF060, 2022C02031, LGF21C080001, LGC21C180001//This work was funded by Agricultural science and technology cooperation program of Zhejiang Province [2022SNJF060], Key Research and Development Program of Zhejiang Province [2022C02031], and Zhejiang Province Public Welfare Technology Application researc/ ; }, mesh = {Animals ; Bacterial Vaccines ; Epitopes ; Escherichia coli ; Immunity, Cellular ; Immunodominant Epitopes ; *Mycoplasma hyopneumoniae/genetics ; *Pneumonia of Swine, Mycoplasmal/prevention & control ; Swine ; }, abstract = {Mycoplasma hyopneumoniae (Mhp), the primary pathogen causing Mycoplasma pneumonia of swine (MPS), brings massive economic losses worldwide. Genomic variability and post-translational protein modification can enhance the immune evasion of Mhp, which makes MPS prone to recurrent outbreaks on farms, even with vaccination or other treatments. The reverse vaccinology pipeline has been developed as an attractive potential method for vaccine development due to its high efficiency and applicability. In this study, a multi-epitope vaccine for Mhp was developed, and its immune responses were evaluated in mice and piglets. Genomic core proteins of Mhp were retrieved through pan-genome analysis, and four immunodominant antigens were screened by host homologous protein removal, membrane protein screening, and virulence factor identification. One immunodominant antigen, AAV27984.1 (membrane nuclease), was expressed by E. coli and named rMhp597. For epitope prioritization, 35 B-cell-derived epitopes were identified from the four immunodominant antigens, and 10 MHC-I and 6 MHC-II binding epitopes were further identified. The MHC-I/II binding epitopes were merged and combined to produce recombinant proteins MhpMEV and MhpMEVC6His, which were used for animal immunization and structural analysis, respectively. Immunization of mice and piglets demonstrated that MhpMEV could induce humoral and cellular immune responses. The mouse serum antibodies could detect all 11 synthetic epitopes, and the piglet antiserum suppressed the nuclease activity of rMhp597. Moreover, piglet serum antibodies could also detect cultured Mhp strain 168. In summary, this study provides immunoassay results for a multi-epitope vaccine derived from the reverse vaccinology pipeline, and offers an alternative vaccine for MPS.}, } @article {pmid35886259, year = {2022}, author = {Rida, T and Ahmad, S and Ullah, A and Ismail, S and Tahir Ul Qamar, M and Afsheen, Z and Khurram, M and Saqib Ishaq, M and Alkhathami, AG and Alatawi, EA and Alrumaihi, F and Allemailem, KS}, title = {Pan-Genome Analysis of Oral Bacterial Pathogens to Predict a Potential Novel Multi-Epitopes Vaccine Candidate.}, journal = {International journal of environmental research and public health}, volume = {19}, number = {14}, pages = {}, pmid = {35886259}, issn = {1660-4601}, mesh = {Base Composition ; *Computational Biology/methods ; *Epitopes, T-Lymphocyte/chemistry/genetics ; Molecular Docking Simulation ; Phylogeny ; RNA, Ribosomal, 16S ; Sequence Analysis, DNA ; Vaccines, Subunit/genetics ; }, abstract = {Porphyromonas gingivalis is a Gram-negative anaerobic bacterium, mainly present in the oral cavity and causes periodontal infections. Currently, no licensed vaccine is available against P. gingivalis and other oral bacterial pathogens. To develop a vaccine against P. gingivalis, herein, we applied a bacterial pan-genome analysis (BPGA) on the bacterial genomes that retrieved a total number of 4908 core proteins, which were further utilized for the identification of good vaccine candidates. After several vaccine candidacy analyses, three proteins, namely lytic transglycosylase domain-containing protein, FKBP-type peptidyl-propyl cis-trans isomerase and superoxide dismutase, were shortlisted for epitopes prediction. In the epitopes prediction phase, different types of B and T-cell epitopes were predicted and only those with an antigenic, immunogenic, non-allergenic, and non-toxic profile were selected. Moreover, all the predicted epitopes were joined with each other to make a multi-epitopes vaccine construct, which was linked further to the cholera toxin B-subunit to enhance the antigenicity of the vaccine. For downward analysis, a three dimensional structure of the designed vaccine was modeled. The modeled structure was checked for binding potency with major histocompatibility complex I (MHC-I), major histocompatibility complex II (MHC-II), and Toll-like receptor 4 (TLR-4) immune cell receptors which revealed that the designed vaccine performed proper binding with respect to immune cell receptors. Additionally, the binding efficacy of the vaccine was validated through a molecular dynamic simulation that interpreted strong intermolecular vaccine-receptor binding and confirmed the exposed situation of vaccine epitopes to the host immune system. In conclusion, the study suggested that the model vaccine construct has the potency to generate protective host immune responses and that it might be a good vaccine candidate for experimental in vivo and in vitro studies.}, } @article {pmid35884840, year = {2022}, author = {Ezzeroug Ezzraimi, A and Hannachi, N and Mariotti, A and Rolland, C and Levasseur, A and Baron, SA and Rolain, JM and Camoin-Jau, L}, title = {The Antibacterial Effect of Platelets on Escherichia coli Strains.}, journal = {Biomedicines}, volume = {10}, number = {7}, pages = {}, pmid = {35884840}, issn = {2227-9059}, abstract = {Platelets play an important role in defense against pathogens; however, the interaction between Escherichia coli and platelets has not been well described and detailed. Our goal was to study the interaction between platelets and selected strains of E. coli in order to evaluate the antibacterial effect of platelets and to assess bacterial effects on platelet activation. Washed platelets and supernatants of pre-activated platelets were incubated with five clinical colistin-resistant and five laboratory colistin-sensitive strains of E. coli in order to study bacterial growth. Platelet activation was measured with flow cytometry by evaluating CD62P expression. To identify the difference in strain behavior toward platelets, a pangenome analysis using Roary and O-antigen serotyping was carried out. Both whole platelets and the supernatant of activated platelets inhibited growth of three laboratory colistin-sensitive strains. In contrast, platelets promoted growth of the other strains. There was a negative correlation between platelet activation and bacterial growth. The Roary results showed no logical clustering to explain the mechanism of platelet resistance. The diversity of the responses might be due to strains of different types of O-antigen. Our results show a bidirectional interaction between platelets and E. coli whose expression is dependent on the bacterial strain involved.}, } @article {pmid35879566, year = {2022}, author = {Suraby, EJ and Sruthi, KB and Antony, G}, title = {Genome-wide identification of type III effectors and other virulence factors in Ralstonia pseudosolanacearum causing bacterial wilt in ginger (Zingiber officinale).}, journal = {Molecular genetics and genomics : MGG}, volume = {297}, number = {5}, pages = {1371-1388}, pmid = {35879566}, issn = {1617-4623}, support = {PDF/2016/003228/LS//Science and Engineering Research Board/ ; }, mesh = {*Ginger ; Phylogeny ; Plant Diseases ; Ralstonia ; *Ralstonia solanacearum ; Virulence Factors ; }, abstract = {Ralstonia pseudosolanacearum causes bacterial wilt in ginger, reducing ginger production worldwide. We sequenced the whole genome of a highly virulent phylotype I, race 4, biovar 3 Ralstonia pseudosolanacearum strain GRsMep isolated from a severely infected ginger field in India. R. pseudosolanacearum GRsMep genome is organised into two replicons: chromosome and megaplasmid with a total genome size of 5,810,605 bp. This strain encodes approximately 72 effectors which include a combination of core effectors as well as highly variable, diverse repertoire of type III effectors. Comparative genome analysis with GMI1000 identified conservation in the genes involved in the general virulence mechanism. Our analysis identified type III effectors, RipBJ and RipBO as present in GRsMep but absent in the reported genomes of other strains infecting Zingiberaceae family. GRsMep contains 126 unique genes when compared to the pangenome of the Ralstonia strains that infect the Zingiberaceae family. The whole-genome data of R. pseudosolanacearum strain will serve as a resource for exploring the evolutionary processes that structure and regulate the virulence determinants of the strain. Pathogenicity testing of the transposon insertional mutant library of GRsMep through virulence assay on ginger plants identified a few candidate virulence determinants specific to bacterial wilt in ginger.}, } @article {pmid35879467, year = {2022}, author = {Dang, VH and Hill, CB and Zhang, XQ and Angessa, TT and McFawn, LA and Li, C}, title = {Multi-locus genome-wide association studies reveal novel alleles for flowering time under vernalisation and extended photoperiod in a barley MAGIC population.}, journal = {TAG. Theoretical and applied genetics. Theoretische und angewandte Genetik}, volume = {135}, number = {9}, pages = {3087-3102}, pmid = {35879467}, issn = {1432-2242}, support = {ITFS//Murdoch University/ ; MSS//Murdoch University/ ; }, mesh = {Alleles ; Edible Grain/genetics ; Genetic Markers ; *Genome-Wide Association Study ; *Hordeum/genetics ; Phenotype ; Photoperiod ; Plant Breeding ; Quantitative Trait Loci ; }, abstract = {Key genes controlling flowering and interactions of different photoperiod alleles with various environments were identified in a barley MAGIC population. A new candidate gene for vernalisation requirements was also detected. Optimal flowering time has a major impact on grain yield in crop species, including the globally important temperate cereal crop barley (Hordeum vulgare L.). Understanding the genetics of flowering is a key avenue to enhancing yield potential. Although bi-parental populations were used intensively to map genes controlling flowering, their lack of genetic diversity requires additional work to obtain desired gene combinations in the selected lines, especially when the two parental cultivars did not carry the genes. Multi-parent mapping populations, which use a combination of four or eight parental cultivars, have higher genetic and phenotypic diversity and can provide novel genetic combinations that cannot be achieved using bi-parental populations. This study uses a Multi-parent advanced generation intercross (MAGIC) population from four commercial barley cultivars to identify genes controlling flowering time in different environmental conditions. Genome-wide association studies (GWAS) were performed using 5,112 high-quality markers from Diversity Arrays Technology sequencing (DArT-seq), and Kompetitive allele-specific polymerase chain reaction (KASP) genetic markers were developed. Phenotypic data were collected from fifteen different field trials for three consecutive years. Planting was conducted at various sowing times, and plants were grown with/without additional vernalisation and extended photoperiod treatments. This study detected fourteen stable regions associated with flowering time across multiple environments. GWAS combined with pangenome data highlighted the role of CEN gene in flowering and enabled the prediction of different CEN alleles from parental lines. As the founder lines of the multi-parental population are elite germplasm, the favourable alleles identified in this study are directly relevant to breeding, increasing the efficiency of subsequent breeding strategies and offering better grain yield and adaptation to growing conditions.}, } @article {pmid35873727, year = {2022}, author = {Wang, Z and Yang, J and Cheng, F and Li, P and Xin, X and Wang, W and Yu, Y and Zhang, D and Zhao, X and Yu, S and Zhang, F and Dong, Y and Su, T}, title = {Subgenome dominance and its evolutionary implications in crop domestication and breeding.}, journal = {Horticulture research}, volume = {9}, number = {}, pages = {uhac090}, pmid = {35873727}, issn = {2662-6810}, abstract = {Polyploidization or whole-genome duplication (WGD) is a well-known speciation and adaptation mechanism in angiosperms, while subgenome dominance is a crucial phenomenon in allopolyploids, established following polyploidization. The dominant subgenomes contribute more to genome evolution and homoeolog expression bias, both of which confer advantages for short-term phenotypic adaptation and long-term domestication. In this review, we firstly summarize the probable mechanistic basis for subgenome dominance, including the effects of genetic [transposon, genetic incompatibility, and homoeologous exchange (HE)], epigenetic (DNA methylation and histone modification), and developmental and environmental factors on this evolutionary process. We then move to Brassica rapa, a typical allopolyploid with subgenome dominance. Polyploidization provides the B. rapa genome not only with the genomic plasticity for adapting to changeable environments, but also an abundant genetic basis for morphological variation, making it a representative species for subgenome dominance studies. According to the 'two-step theory', B. rapa experienced genome fractionation twice during WGD, in which most of the genes responding to the environmental cues and phytohormones were over-retained, enhancing subgenome dominance and consequent adaption. More than this, the pangenome of 18 B. rapa accessions with different morphotypes recently constructed provides further evidence to reveal the impacts of polyploidization and subgenome dominance on intraspecific diversification in B. rapa. Above and beyond the fundamental understanding of WGD and subgenome dominance in B. rapa and other plants, however, it remains elusive why subgenome dominance has tissue- and spatiotemporal-specific features and could shuffle between homoeologous regions of different subgenomes by environments in allopolyploids. We lastly propose acceleration of the combined application of resynthesized allopolyploids, omics technology, and genome editing tools to deepen mechanistic investigations of subgenome dominance, both genetic and epigenetic, in a variety of species and environments. We believe that the implications of genomic and genetic basis of a variety of ecologically, evolutionarily, and agriculturally interesting traits coupled with subgenome dominance will be uncovered and aid in making new discoveries and crop breeding.}, } @article {pmid35873140, year = {2022}, author = {Kopf, A and Bunk, B and Coldewey, SM and Gunzer, F and Riedel, T and Schröttner, P}, title = {Comparative Genomic Analysis of the Human Pathogen Wohlfahrtiimonas Chitiniclastica Provides Insight Into the Identification of Antimicrobial Resistance Genotypes and Potential Virulence Traits.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {912427}, pmid = {35873140}, issn = {2235-2988}, mesh = {*Anti-Bacterial Agents/pharmacology/therapeutic use ; *Drug Resistance, Bacterial/genetics ; Gammaproteobacteria ; Genomics ; Genotype ; Humans ; Macrolides ; Virulence/genetics ; }, abstract = {Recent studies suggest that Wohlfahrtiimonas chitiniclastica may be the cause of several diseases in humans including sepsis and bacteremia making the bacterium as a previously underappreciated human pathogen. However, very little is known about the pathogenicity and genetic potential of W. chitiniclastica; therefore, it is necessary to conduct systematic studies to gain a deeper understanding of its virulence characteristics and treatment options. In this study, the entire genetic repertoire of all publicly available W. chitiniclastica genomes was examined including in silico characterization of bacteriophage content, antibiotic resistome, and putative virulence profile. The pan-genome of W. chitiniclastica comprises 3819 genes with 1622 core genes (43%) indicating a putative metabolic conserved species. Furthermore, in silico analysis indicated presumed resistome expansion as defined by the presence of genome-encoded transposons and bacteriophages. While macrolide resistance genes macA and macB are located within the core genome, additional antimicrobial resistance genotypes for tetracycline (tetH, tetB, and tetD), aminoglycosides (ant(2'')-Ia, aac(6')-Ia,aph(3'')-Ib, aph(3')-Ia, and aph(6)-Id)), sulfonamide (sul2), streptomycin (strA), chloramphenicol (cat3), and beta-lactamase (blaVEB) are distributed among the accessory genome. Notably, our data indicate that the type strain DSM 18708[T] does not encode any additional clinically relevant antibiotic resistance genes, whereas drug resistance is increasing within the W. chitiniclastica clade. This trend should be monitored with caution. To the best of our knowledge, this is the first comprehensive genome analysis of this species, providing new insights into the genome of this opportunistic human pathogen.}, } @article {pmid35865929, year = {2022}, author = {Li, Y and Wang, Y and Liu, J}, title = {Genomic Insights Into the Interspecific Diversity and Evolution of Mobiluncus, a Pathogen Associated With Bacterial Vaginosis.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {939406}, pmid = {35865929}, issn = {1664-302X}, abstract = {Bacterial vaginosis (BV) is a common vaginal infection and has been associated with increased risk for a wide array of health issues. BV is linked with a variety of heterogeneous pathogenic anaerobic bacteria, among which Mobiluncus is strongly associated with BV diagnosis. However, their genetic features, pathogenicity, interspecific diversity, and evolutionary characters have not been illustrated at genomic level. The current study performed phylogenomic and comparative genomic analyses of Mobiluncus. Phylogenomic analyses revealed remarkable phylogenetic distinctions among different species. Compared with M. curtisii, M. mulieris had a larger genome and pangenome size with more insertion sequences but less CRISPR-Cas systems. In addition, these two species were diverse in profile of virulence factors, but harbored similar antibiotic resistance genes. Statistically different functional genome profiles between strains from the two species were determined, as well as correlations of some functional genes/pathways with putative pathogenicity. We also showed that high levels of horizontal gene transfer might be an important strategy for species diversification and pathogenicity. Collectively, this study provides the first genome sequence level description of Mobiluncus, and may shed light on its virulence/pathogenicity, functional diversification, and evolutionary dynamics. Our study could facilitate the further investigations of this important pathogen, and might improve the future treatment of BV.}, } @article {pmid35863717, year = {2022}, author = {Dindhoria, K and Kumar, S and Baliyan, N and Raphel, S and Halami, PM and Kumar, R}, title = {Bacillus licheniformis MCC 2514 genome sequencing and functional annotation for providing genetic evidence for probiotic gut adhesion properties and its applicability as a bio-preservative agent.}, journal = {Gene}, volume = {840}, number = {}, pages = {146744}, doi = {10.1016/j.gene.2022.146744}, pmid = {35863717}, issn = {1879-0038}, mesh = {*Bacillus licheniformis/genetics/metabolism ; Bacteria/genetics ; *Bacteriocins/genetics/metabolism ; Genome, Bacterial ; *Probiotics ; }, abstract = {Bacillus licheniformis is a well-known probiotic that can be found in a variety of foods. The strain Bacillus licheniformis MCC 2514 was previously characterized by our group for its bio-physiological capabilities establishing it as a promising probiotic, but information on the genetic evidence for its attributes was lacking. In the current study, whole genome analysis identified the underlying molecular determinants responsible for its probiotic potential. The circular genome of MCC 2514 was 4,230,480 bp with 46.2% GC content, 24 rRNA, and 83 tRNA genes. The pangenome analysis between B. licheniformis MCC 2514 and 12 other B. licheniformis strains revealed a pangenome of 6008 genes and core genome of 3775 genes. Genome mining revealed NRPS and bacteriocins producing gene clusters indicating its biocontrol properties. Several genes encoding carbohydrate degrading enzymes, which aid in proper food degradation in the intestine, were also observed. Stress tolerance, vitamin, and essential amino acids biosynthesis related genes were found, which are important characteristics of a probiotic strain. Additionally, vital genes responsible for gut adhesion and biofilm formation were observed in its genome. The bacterium has been shown to improve the shelf life of idli batter by preventing whey separation, CO2, and odour production while maintaining the pH of 3.96-4.29, especially at cold temperatures. It has significantly reduced coliform contamination at both room and low temperatures, demonstrating its bio-preservative ability, which is also corroborated by the presence of the NRPS and bacteriocin gene clusters in its genome. The present study helped to understand both, the ability of B. licheniformis MCC 2514 to adapt the intestinal gut environment and its probiotic functionality for food preservation.}, } @article {pmid35863000, year = {2022}, author = {Wang, Z and Guo, G and Li, Q and Li, P and Li, M and Zhou, L and Tan, Z and Zhang, W}, title = {Combing Immunoinformatics with Pangenome Analysis To Design a Multiepitope Subunit Vaccine against Klebsiella pneumoniae K1, K2, K47, and K64.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0114822}, pmid = {35863000}, issn = {2165-0497}, mesh = {Anti-Bacterial Agents/therapeutic use ; Carbapenems/therapeutic use ; *Cross Infection/drug therapy ; Humans ; *Klebsiella Infections/drug therapy/epidemiology/prevention & control ; Klebsiella pneumoniae/genetics ; Molecular Docking Simulation ; Vaccines, Subunit/genetics/therapeutic use ; }, abstract = {Klebsiella pneumoniae is an opportunistic Gram-negative bacterium that has become a leading causative agent of nosocomial infections, mainly infecting patients with immunosuppressive diseases. Capsular (K) serotypes K1, K2, K47, and K64 are commonly associated with higher virulence (hypervirulent Klebsiella pneumoniae), and more threateningly, isolates belonging to the last two K serotypes are also frequently associated with resistance to carbapenem (hypervirulent carbapenem-resistant Klebsiella pneumoniae). The prevalence of these isolates has posed significant threats to human health, and there are no appropriate therapies available against them. Therefore, in this study, a method combining immunoinformatics and pangenome analysis was applied for contriving a multiepitope subunit vaccine against these four threatening serotypes. To obtain cross-protection, 12 predicted conserved antigens were screened from the core genome of 274 complete Klebsiella pneumoniae genomes (KL1, KL2, KL47, and KL64), from which the epitopes of T and B cells were extracted for vaccine construction. In addition, the immunological properties, the interaction with Toll-like receptors, and the stability in a simulative humoral environment were evaluated by immunoinformatics methods, molecular docking, and molecular dynamics simulation. All of these evaluations indicated the potency of this constructed vaccine to be an effective therapeutic agent. Lastly, the cDNA of the designed vaccine was optimized and ligated to pET-28a(+) for expression vector construction. Overall, our research provides a newly cross-protective control strategy against these troublesome pathogens and paves the way for the development of a safe and effective vaccine. IMPORTANCE Klebsiella pneumoniae is an opportunistic Gram-negative bacterium that has become a leading causative agent of nosocomial infections. Among the numerous capsular serotypes, K1, K2, K47, and K64 are commonly associated with higher virulence (hypervirulent K. pneumoniae). More threateningly, the last two serotypes are frequently associated with resistance to carbapenem (hypervirulent carbapenem-resistant K. pneumoniae). However, there is currently no therapeutic agent or vaccine specifically against these isolates. Therefore, development of a vaccine against these pathogens is very essential. In this study, for the first time, a method combining pangenome analysis, reverse vaccinology, and immunoinformatics was applied for contriving a multiepitope subunit vaccine against K. pneumoniae isolates of K1, K2, K47, and K64. Also, the immunological properties of the constructed vaccine were evaluated and its high potency was revealed. Overall, our research will pave the way for the vaccine development against these four threatening capsular serotypes of K. pneumoniae.}, } @article {pmid35862809, year = {2022}, author = {Sassi, M and Bronsard, J and Pascreau, G and Emily, M and Donnio, PY and Revest, M and Felden, B and Wirth, T and Augagneur, Y}, title = {Forecasting Staphylococcus aureus Infections Using Genome-Wide Association Studies, Machine Learning, and Transcriptomic Approaches.}, journal = {mSystems}, volume = {7}, number = {4}, pages = {e0037822}, pmid = {35862809}, issn = {2379-5077}, mesh = {Animals ; Humans ; Staphylococcus aureus/genetics ; Genome-Wide Association Study ; Transcriptome ; *Staphylococcal Infections/diagnosis ; RNA ; *Bacteremia/microbiology ; Machine Learning ; }, abstract = {Staphylococcus aureus is a major human and animal pathogen, colonizing diverse ecological niches within its hosts. Predicting whether an isolate will infect a specific host and its subsequent clinical fate remains unknown. In this study, we investigated the S. aureus pangenome using a curated set of 356 strains, spanning a wide range of hosts, origins, and clinical display and antibiotic resistance profiles. We used genome-wide association study (GWAS) and random forest (RF) algorithms to discriminate strains based on their origins and clinical sources. Here, we show that the presence of sak and scn can discriminate strains based on their host specificity, while other genes such as mecA are often associated with virulent outcomes. Both GWAS and RF indicated the importance of intergenic regions (IGRs) and coding DNA sequence (CDS) but not sRNAs in forecasting an outcome. Additional transcriptomic analyses performed on the most prevalent clonal complex 8 (CC8) clonal types, in media mimicking nasal colonization or bacteremia, indicated three RNAs as potential RNA markers to forecast infection, followed by 30 others that could serve as infection severity predictors. Our report shows that genetic association and transcriptomics are complementary approaches that will be combined in a single analytical framework to improve our understanding of bacterial pathogenesis and ultimately identify potential predictive molecular markers. IMPORTANCE Predicting the outcome of bacterial colonization and infections, based on extensive genomic and transcriptomic data from a given pathogen, would be of substantial help for clinicians in treating and curing patients. In this report, genome-wide association studies and random forest algorithms have defined gene combinations that differentiate human from animal strains, colonization from diseases, and nonsevere from severe diseases, while it revealed the importance of IGRs and CDS, but not small RNAs (sRNAs), in anticipating an outcome. In addition, transcriptomic analyses performed on the most prevalent clonal types, in media mimicking either nasal colonization or bacteremia, revealed significant differences and therefore potent RNA markers. Overall, the use of both genomic and transcriptomic data in a single analytical framework can enhance our understanding of bacterial pathogenesis.}, } @article {pmid35862683, year = {2022}, author = {Baseggio, L and Rudenko, O and Engelstädter, J and Barnes, AC}, title = {The Evolution of a Specialized, Highly Virulent Fish Pathogen through Gene Loss and Acquisition of Host-Specific Survival Mechanisms.}, journal = {Applied and environmental microbiology}, volume = {88}, number = {14}, pages = {e0022222}, pmid = {35862683}, issn = {1098-5336}, mesh = {Animals ; *Fish Diseases/microbiology ; Fishes/microbiology ; *Gram-Negative Bacterial Infections/microbiology ; Photobacterium/metabolism ; Phylogeny ; }, abstract = {Photobacterium damselae comprises two subspecies, P. damselae subsp. damselae and P. damselae subsp. piscicida, that contrast remarkably despite their taxonomic relationship. The former is opportunistic and free-living but can cause disease in compromised individuals from a broad diversity of taxa, while the latter is a highly specialized, primary fish pathogen. Here, we employ new closed curated genome assemblies from Australia to estimate the global phylogenetic structure of the species P. damselae. We identify genes responsible for the shift from an opportunist to a host-adapted fish pathogen, potentially via an arthropod vector as fish-to-fish transmission was not achieved in repeated cohabitation challenges despite high virulence for Seriola lalandi. Acquisition of ShdA adhesin and of thiol peroxidase may have allowed the environmental, generalist ancestor to colonize zooplankton and to occasionally enter in fish host sentinel cells. As dependence on the host has increased, P. damselae has lost nonessential genes, such as those related to nitrite and sulfite reduction, urea degradation, a type 6 secretion system (T6SS) and several toxin-antitoxin (TA) systems. Similar to the evolution of Yersinia pestis, the loss of urease may be the crucial event that allowed the pathogen to stably colonize zooplankton vectors. Acquisition of host-specific genes, such as those required to form a sialic acid capsule, was likely necessary for the emergent P. damselae subsp. piscicida to become a highly specialized, facultative intracellular fish pathogen. Processes that have shaped P. damselae subsp. piscicida from subsp. damselae are similar to those underlying evolution of Yersinia pestis from Y. pseudotuberculosis. IMPORTANCE Photobacterium damselae subsp. damselae is a ubiquitous marine bacterium and opportunistic pathogen of compromised hosts of diverse taxa. In contrast, its sister subspecies P. damselae subsp. piscicida (Pdp) is highly virulent in fish. Pdp has evolved from a single subclade of Pdd through gene loss and acquisition. We show that fish-to-fish transmission does not occur in repeated infection models in the primary host, Seriola lalandi, and present genomic evidence for vector-borne transmission, potentially via zooplankton. The broad genomic changes from generalist Pdd to specialist Pdp parallel those of the environmental opportunist Yersinia pseudotuberculosis to vector-borne plague bacterium Y. pestis and demonstrate that evolutionary processes in bacterial pathogens are universal between the terrestrial and marine biosphere.}, } @article {pmid35861394, year = {2022}, author = {Jonkheer, EM and van Workum, DM and Sheikhizadeh Anari, S and Brankovics, B and de Haan, JR and Berke, L and van der Lee, TAJ and de Ridder, D and Smit, S}, title = {PanTools v3: functional annotation, classification and phylogenomics.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {18}, pages = {4403-4405}, pmid = {35861394}, issn = {1367-4811}, support = {TU 16022//Dutch Ministry of Economic Affairs in the Topsector Program 'Horticulture and Starting Materials'/ ; }, mesh = {Humans ; Phylogeny ; *SARS-CoV-2/genetics ; *COVID-19 ; Software ; Genome, Viral ; }, abstract = {SUMMARY: The ever-increasing number of sequenced genomes necessitates the development of pangenomic approaches for comparative genomics. Introduced in 2016, PanTools is a platform that allows pangenome construction, homology grouping and pangenomic read mapping. The use of graph database technology makes PanTools versatile, applicable from small viral genomes like SARS-CoV-2 up to large plant or animal genomes like tomato or human. Here, we present our third major update to PanTools that enables the integration of functional annotations and provides both gene-level analyses and phylogenetics.

PanTools is implemented in Java 8 and released under the GNU GPLv3 license. Software and documentation are available at https://git.wur.nl/bioinformatics/pantools.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35860541, year = {2022}, author = {Hussain, B and Akpınar, BA and Alaux, M and Algharib, AM and Sehgal, D and Ali, Z and Aradottir, GI and Batley, J and Bellec, A and Bentley, AR and Cagirici, HB and Cattivelli, L and Choulet, F and Cockram, J and Desiderio, F and Devaux, P and Dogramaci, M and Dorado, G and Dreisigacker, S and Edwards, D and El-Hassouni, K and Eversole, K and Fahima, T and Figueroa, M and Gálvez, S and Gill, KS and Govta, L and Gul, A and Hensel, G and Hernandez, P and Crespo-Herrera, LA and Ibrahim, A and Kilian, B and Korzun, V and Krugman, T and Li, Y and Liu, S and Mahmoud, AF and Morgounov, A and Muslu, T and Naseer, F and Ordon, F and Paux, E and Perovic, D and Reddy, GVP and Reif, JC and Reynolds, M and Roychowdhury, R and Rudd, J and Sen, TZ and Sukumaran, S and Ozdemir, BS and Tiwari, VK and Ullah, N and Unver, T and Yazar, S and Appels, R and Budak, H}, title = {Capturing Wheat Phenotypes at the Genome Level.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {851079}, pmid = {35860541}, issn = {1664-462X}, abstract = {Recent technological advances in next-generation sequencing (NGS) technologies have dramatically reduced the cost of DNA sequencing, allowing species with large and complex genomes to be sequenced. Although bread wheat (Triticum aestivum L.) is one of the world's most important food crops, efficient exploitation of molecular marker-assisted breeding approaches has lagged behind that achieved in other crop species, due to its large polyploid genome. However, an international public-private effort spanning 9 years reported over 65% draft genome of bread wheat in 2014, and finally, after more than a decade culminated in the release of a gold-standard, fully annotated reference wheat-genome assembly in 2018. Shortly thereafter, in 2020, the genome of assemblies of additional 15 global wheat accessions was released. As a result, wheat has now entered into the pan-genomic era, where basic resources can be efficiently exploited. Wheat genotyping with a few hundred markers has been replaced by genotyping arrays, capable of characterizing hundreds of wheat lines, using thousands of markers, providing fast, relatively inexpensive, and reliable data for exploitation in wheat breeding. These advances have opened up new opportunities for marker-assisted selection (MAS) and genomic selection (GS) in wheat. Herein, we review the advances and perspectives in wheat genetics and genomics, with a focus on key traits, including grain yield, yield-related traits, end-use quality, and resistance to biotic and abiotic stresses. We also focus on reported candidate genes cloned and linked to traits of interest. Furthermore, we report on the improvement in the aforementioned quantitative traits, through the use of (i) clustered regularly interspaced short-palindromic repeats/CRISPR-associated protein 9 (CRISPR/Cas9)-mediated gene-editing and (ii) positional cloning methods, and of genomic selection. Finally, we examine the utilization of genomics for the next-generation wheat breeding, providing a practical example of using in silico bioinformatics tools that are based on the wheat reference-genome sequence.}, } @article {pmid35856711, year = {2022}, author = {Guitart-Matas, J and Gonzalez-Escalona, N and Maguire, M and Vilaró, A and Martinez-Urtaza, J and Fraile, L and Migura-Garcia, L}, title = {Revealing Genomic Insights of the Unexplored Porcine Pathogen Actinobacillus pleuropneumoniae Using Whole Genome Sequencing.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0118522}, pmid = {35856711}, issn = {2165-0497}, mesh = {*Actinobacillus Infections/microbiology/veterinary ; *Actinobacillus pleuropneumoniae/genetics ; Animals ; Genomics ; *Pleuropneumonia/microbiology/veterinary ; Serotyping ; Swine ; *Swine Diseases/microbiology ; Whole Genome Sequencing ; }, abstract = {Actinobacillus pleuropneumoniae (APP) is the causative agent of pleuropneumonia in pigs, one of the most relevant bacterial respiratory diseases in the swine industry. To date, 19 serotypes have been described based on capsular polysaccharide typing with significant virulence dissimilarities. In this study, 16 APP isolates from Spanish origin were selected to perform antimicrobial susceptibility tests and comparative genomic analysis using whole genome sequencing (WGS). To obtain a more comprehensive worldwide molecular epidemiologic analyses, all APP whole genome assemblies available at the National Center for Biotechnology Information (NCBI) at the time of the study were also included. An in-house in silico PCR approach enabled the correct serotyping of unserotyped or incorrectly serotyped isolates and allowed for the discrimination between serotypes 9 and 11. A pangenome analysis identified the presence or absence of gene clusters to be serotype specific, as well as virulence profile analyses targeting the apx operons. Antimicrobial resistance genes were correlated to the presence of specific plasmids. Altogether, this study provides new insights into the genetic variability within APP serotypes, correlates phenotypic tests with bioinformatic analyses and manifests the benefits of populated databases for a better assessment of diversity and variability of relatively unknown pathogens. Overall, genomic comparative analysis enhances the understanding of transmission and epidemiological patterns of this species and suggests vertical transmission of the pathogen, including the resistance genes, within the Spanish integrated systems. IMPORTANCE Pleuropneumonia is one of the most relevant respiratory infections in the swine industry. Despite Actinobacillus pleuropneumoniae (APP) being one of the most important pathogens in the pig production, this is the first comparative study including all available whole genome sequencing data from NCBI. Moreover, this study also includes 16 APP isolates of Spanish origin with known epidemiological relationships through vertical integrated systems. Genomic comparisons provided a deeper understanding of molecular and epidemiological knowledge between different APP serotypes. Furthermore, determination of resistance and toxin profiles allowed correlation with the presence of mobile genetic elements and specific serotype, respectively.}, } @article {pmid35856667, year = {2022}, author = {Babiker, A and Bower, C and Lutgring, JD and Petit, RA and Howard-Anderson, J and Ansari, U and McAllister, G and Adamczyk, M and Breaker, E and Satola, SW and Jacob, JT and Woodworth, MH}, title = {Clinical and Genomic Epidemiology of mcr-9-Carrying Carbapenem-Resistant Enterobacterales Isolates in Metropolitan Atlanta, 2012 to 2017.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0252221}, pmid = {35856667}, issn = {2165-0497}, support = {K23 AI144036/AI/NIAID NIH HHS/United States ; U54 CK000485/CK/NCEZID CDC HHS/United States ; U54CK000485/ACL/ACL HHS/United States ; }, mesh = {Anti-Bacterial Agents/pharmacology ; *Carbapenems/pharmacology ; *Colistin/pharmacology ; Drug Resistance, Bacterial/genetics ; Genomics ; Microbial Sensitivity Tests ; Phylogeny ; Plasmids ; }, abstract = {Colistin is a last-resort antibiotic for multidrug-resistant Gram-negative infections. Recently, the ninth allele of the mobile colistin resistance (mcr) gene family, designated mcr-9, was reported. However, its clinical and public health significance remains unclear. We queried genomes of carbapenem-resistant Enterobacterales (CRE) for mcr-9 from a convenience sample of clinical isolates collected between 2012 and 2017 through the Georgia Emerging Infections Program, a population- and laboratory-based surveillance program. Isolates underwent phenotypic characterization and whole-genome sequencing. Phenotypic characteristics, genomic features, and clinical outcomes of mcr-9-positive and -negative CRE cases were then compared. Among 235 sequenced CRE genomes, 13 (6%) were found to harbor mcr-9, all of which were Enterobacter cloacae complex. The median MIC and rates of heteroresistance and inducible resistance to colistin were similar between mcr-9-positive and -negative isolates. However, rates of resistance were higher among mcr-9-positive isolates across most antibiotic classes. All cases had significant health care exposures. The 90-day mortality was similarly high in both mcr-9-positive (31%) and -negative (7%) CRE cases. Nucleotide identity and phylogenetic analysis did not reveal geotemporal clustering. mcr-9-positive isolates had a significantly higher number of median [range] antimicrobial resistance (AMR) genes (16 [4 to 22] versus 6 [2 to 15]; P < 0.001) than did mcr-9-negative isolates. Pangenome tests confirmed a significant association of mcr-9 detection with mobile genetic element and heavy metal resistance genes. Overall, the presence of mcr-9 was not associated with significant changes in colistin resistance or clinical outcomes, but continued genomic surveillance to monitor for emergence of AMR genes is warranted. IMPORTANCE Colistin is a last-resort antibiotic for multidrug-resistant Gram-negative infections. A recently described allele of the mobile colistin resistance (mcr) gene family, designated mcr-9, has been widely reported among Enterobacterales species. However, its clinical and public health significance remains unclear. We compared characteristics and outcomes of mcr-9-positive and -negative CRE cases. All cases were acquired in the health care setting and associated with a high rate of mortality. The presence of mcr-9 was not associated with significant changes in colistin resistance, heteroresistance, or inducible resistance but was associated with resistance to other antimicrobials and antimicrobial resistance (AMR), virulence, and heavy metal resistance (HMR) genes. Overall, the presence of mcr-9 was not associated with significant phenotypic changes or clinical outcomes. However, given the increase in AMR and HMR gene content and potential clinical impact, continued genomic surveillance of multidrug-resistant organisms to monitor for emergence of AMR genes is warranted.}, } @article {pmid35851621, year = {2022}, author = {Wang, Y and Du, F and Wang, J and Wang, K and Tian, C and Qi, X and Lu, F and Liu, X and Ye, X and Jiao, Y}, title = {Improving bread wheat yield through modulating an unselected AP2/ERF gene.}, journal = {Nature plants}, volume = {8}, number = {8}, pages = {930-939}, pmid = {35851621}, issn = {2055-0278}, mesh = {*Bread ; Edible Grain/genetics ; Ethylenes ; Humans ; Plant Breeding ; Repressor Proteins ; *Triticum/genetics ; }, abstract = {Crop breeding heavily relies on natural genetic variation. However, additional new variations are desired to meet the increasing human demand. Inflorescence architecture determines grain number per spike, a major determinant of bread wheat (Triticum aestivum L.) yield. Here, using Brachypodium distachyon as a wheat proxy, we identified DUO-B1, encoding an APETALA2/ethylene response factor (AP2/ERF) transcription factor, regulating spike inflorescence architecture in bread wheat. Mutations of DUO-B1 lead to mild supernumerary spikelets, increased grain number per spike and, importantly, increased yield under field conditions without affecting other major agronomic traits. DUO-B1 suppresses cell division and promotes the expression of BH[t]/WFZP, whose mutations could lead to branched 'miracle-wheat'. Pan-genome analysis indicated that DUO-B1 has not been utilized in breeding, and holds promise to increase wheat yield further.}, } @article {pmid35841149, year = {2022}, author = {Liu, Y and Tian, Z}, title = {Super graph-based pan-genome: Bringing rice functional genomic study into a new dawn.}, journal = {Molecular plant}, volume = {15}, number = {9}, pages = {1409-1411}, doi = {10.1016/j.molp.2022.07.005}, pmid = {35841149}, issn = {1752-9867}, mesh = {Genome ; Genomics ; *Oryza/genetics ; }, } @article {pmid35838756, year = {2022}, author = {Ksiezarek, M and Grosso, F and Ribeiro, TG and Peixe, L}, title = {Genomic diversity of genus Limosilactobacillus.}, journal = {Microbial genomics}, volume = {8}, number = {7}, pages = {}, pmid = {35838756}, issn = {2057-5858}, mesh = {*Genome ; *Genomics ; Lactobacillaceae ; }, abstract = {The genus Limosilactobacillus (formerly Lactobacillus) contains multiple species considered to be adapted to vertebrates, yet their genomic diversity has not been explored. In this study, we performed comparative genomic analysis of Limosilactobacillus (22 species; 332 genomes) isolated from different niches, further focusing on human strains (11 species; 74 genomes) and their adaptation features to specific body sites. Phylogenomic analysis of Limosilactobacillus showed misidentification of some strains deposited in public databases and existence of putative novel Limosilactobacillus species. The pangenome analysis revealed a remarkable genomic diversity (only 1.3 % of gene clusters are shared), and we did not observe a strong association of the accessory genome with different niches. The pangenome of Limosilactobacillus reuteri and Limosilactobacillus fermentum was open, suggesting that acquisition of genes is still occurring. Although most Limosilactobacillus were predicted as antibiotic susceptible (83%), acquired antibiotic-resistance genes were common in L. reuteri from food-producing animals. Genes related to lactic acid isoform production (>95 %) and putative bacteriocins (70.2%) were identified in most Limosilactobacillus strains, while prophages (55.4%) and CRISPR-Cas systems (32.0%) were less prevalent. Among strains from human sources, several metabolic pathways were predicted as conserved and completed. Their accessory genome was highly variable and did not cluster according to different human body sites, with some exceptions (urogenital Limosilactobacillus vaginalis , Limosilactobacillus portuensis , Limosilactobacillus urinaemulieris and Limosilactobacillus coleohominis or gastrointestinal Limosilactobacillus mucosae). Moreover, we identified 12 Kyoto Encyclopedia of Genes and Genomes (KEGG) orthologues that were significantly enriched in strains from particular body sites. We concluded that evolution of the highly diverse Limosilactobacillus is complex and not always related to niche or human body site origin.}, } @article {pmid35832805, year = {2022}, author = {Zaidi, SE and Zaheer, R and Barbieri, R and Cook, SR and Hannon, SJ and Booker, CW and Church, D and Van Domselaar, G and Zovoilis, A and McAllister, TA}, title = {Genomic Characterization of Enterococcus hirae From Beef Cattle Feedlots and Associated Environmental Continuum.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {859990}, pmid = {35832805}, issn = {1664-302X}, abstract = {Enterococci are commensal bacteria of the gastrointestinal tract of humans, animals, and insects. They are also found in soil, water, and plant ecosystems. The presence of enterococci in human, animal, and environmental settings makes these bacteria ideal candidates to study antimicrobial resistance in the One-Health continuum. This study focused on Enterococcus hirae isolates (n = 4,601) predominantly isolated from beef production systems including bovine feces (n = 4,117, 89.5%), catch-basin water (n = 306, 66.5%), stockpiled bovine manure (n = 24, 0.5%), and natural water sources near feedlots (n = 145, 32%), and a few isolates from urban wastewater (n = 9, 0.2%) denoted as human-associated environmental samples. Antimicrobial susceptibility profiling of a subset (n = 1,319) of E. hirae isolates originating from beef production systems (n = 1,308) showed high resistance to tetracycline (65%) and erythromycin (57%) with 50.4% isolates harboring multi-drug resistance, whereas urban wastewater isolates (n = 9) were resistant to nitrofurantoin (44.5%) and tigecycline (44.5%) followed by linezolid (33.3%). Genes for tetracycline (tetL, M, S/M, and O/32/O) and macrolide resistance erm(B) were frequently found in beef production isolates. Antimicrobial resistance profiles of E. hirae isolates recovered from different environmental settings appeared to reflect the kind of antimicrobial usage in beef and human sectors. Comparative genomic analysis of E. hirae isolates showed an open pan-genome that consisted of 1,427 core genes, 358 soft core genes, 1701 shell genes, and 7,969 cloud genes. Across species comparative genomic analysis conducted on E. hirae, Enterococcus faecalis and Enterococcus faecium genomes revealed that E. hirae had unique genes associated with vitamin production, cellulose, and pectin degradation, traits which may support its adaptation to the bovine digestive tract. E. faecium and E. faecalis more frequently harbored virulence genes associated with biofilm formation, iron transport, and cell adhesion, suggesting niche specificity within these species.}, } @article {pmid35821092, year = {2022}, author = {Shang, L and Li, X and He, H and Yuan, Q and Song, Y and Wei, Z and Lin, H and Hu, M and Zhao, F and Zhang, C and Li, Y and Gao, H and Wang, T and Liu, X and Zhang, H and Zhang, Y and Cao, S and Yu, X and Zhang, B and Zhang, Y and Tan, Y and Qin, M and Ai, C and Yang, Y and Zhang, B and Hu, Z and Wang, H and Lv, Y and Wang, Y and Ma, J and Wang, Q and Lu, H and Wu, Z and Liu, S and Sun, Z and Zhang, H and Guo, L and Li, Z and Zhou, Y and Li, J and Zhu, Z and Xiong, G and Ruan, J and Qian, Q}, title = {A super pan-genomic landscape of rice.}, journal = {Cell research}, volume = {32}, number = {10}, pages = {878-896}, pmid = {35821092}, issn = {1748-7838}, mesh = {Domestication ; Genome, Plant ; Genomics ; Leucine/genetics ; Nucleotides ; *Oryza/genetics ; }, abstract = {Pan-genomes from large natural populations can capture genetic diversity and reveal genomic complexity. Using de novo long-read assembly, we generated a graph-based super pan-genome of rice consisting of a 251-accession panel comprising both cultivated and wild species of Asian and African rice. Our pan-genome reveals extensive structural variations (SVs) and gene presence/absence variations. Additionally, our pan-genome enables the accurate identification of nucleotide-binding leucine-rich repeat genes and characterization of their inter- and intraspecific diversity. Moreover, we uncovered grain weight-associated SVs which specify traits by affecting the expression of their nearby genes. We characterized genetic variants associated with submergence tolerance, seed shattering and plant architecture and found independent selection for a common set of genes that drove adaptation and domestication in Asian and African rice. This super pan-genome facilitates pinpointing of lineage-specific haplotypes for trait-associated genes and provides insights into the evolutionary events that have shaped the genomic architecture of various rice species.}, } @article {pmid35821091, year = {2022}, author = {Olsen, KM}, title = {The rice pangenome branches out.}, journal = {Cell research}, volume = {32}, number = {10}, pages = {867-868}, pmid = {35821091}, issn = {1748-7838}, mesh = {Genomics ; *Oryza/genetics ; }, } @article {pmid35819407, year = {2022}, author = {Li, LF and Xu, L and Li, WH and Sun, JQ}, title = {Sinomicrobium kalidii sp. nov., an indole-3-acetic acid-producing endophyte from a shoot of halophyte Kalidium cuspidatum.}, journal = {International journal of systematic and evolutionary microbiology}, volume = {72}, number = {7}, pages = {}, doi = {10.1099/ijsem.0.005452}, pmid = {35819407}, issn = {1466-5034}, mesh = {Bacterial Typing Techniques ; Base Composition ; *Chenopodiaceae ; DNA, Bacterial/genetics ; Endophytes/genetics ; Fatty Acids/chemistry ; Indoleacetic Acids ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Salt-Tolerant Plants ; Sequence Analysis, DNA ; Soil Microbiology ; }, abstract = {To better understand the effects of endophytic bacteria on halophytes, a bacteria that produced indole-3-acetic acid and 1-aminocyclopropane-1-carboxylic acid deaminase, designated HD2P242[T], was isolated from a shoot of Kalidium cuspidatum collected in Tumd Right Banner, Inner Mongolia, PR China. The cells of strain HD2P242[T] were Gram-stain-negative, strictly aerobic, motile by gliding, non-spore-forming and rod-shaped. Strain HD2P242[T] grew at pH 6.0-9.0 (optimum, pH 7.0) and 10-45 °C (optimum 37 °C), in the presence of 0-8 % (w/v) NaCl (optimum, 4 %). The strain was positive for oxidase and catalase. The phylogenetic trees based on the 16S rRNA gene sequences and the whole genome sequences both showed that strain HD2P242[T] clustered with Sinomicrobium pectinilyticum 5DNS001[T] and S. oceani SCSIO 03483[T], and had 95.6, 94.3 and <94.3 % 16S rRNA gene similarities to S. pectinilyticum 5DNS001[T], S. oceani SCSIO 03483[T] and all the other current type strains. Strain HD2P242[T] contained menaquinone 6 as its sole respiratory quinone. Its major polar lipids were phosphatidylethanolamine, two unidentified aminolipids, two unidentified phospholipids and an unidentified lipid. The major fatty acids were iso-C17 : 0, iso-C16 : 0 3-OH, anteiso-C17 : 0 and summed feature 6 (C19 : 1 ω9c and/or C19 : 1 ω11c). The genome consisted of a 5 364 211 bp circular chromosome, with a G+C content of 45.1 mol%, predicting 4391 coding sequence genes, 47 tRNA genes and two rRNA operons. The average nucleotide identity based on blast and the digital DNA-DNA hybridization values of strain HD2P242[T] with S. oceani SCSIO 03483[T] and S. pectinilyticum 5DNS001[T] were 73.8 and 77.0%, and 22.3 and 22.2%, respectively. The comparative genome analysis showed that the pan-genomes of strain HD2P242[T] and three Sinomicrobium type strains possessed 4236 clusters, whereas the core genome possessed 2162 clusters, which accounted for 52.3 % of all the clusters. The genomic analysis revealed that all four Sinomicrobium members could utilize d-glucose by the glycolysis-gluconeogenesis pathway or the pentose phosphate pathway. The tricarboxylic acid cycle was utilized as a metabolic centre. The phylogenetic, physiological and phenotypic characteristics allowed the discrimination of strain HD2P242[T] from its phylogenetic relatives. Therefore, Sinomicrobium kalidii sp. nov. is proposed, and the type strain is HD2P242[T] (=CGMCC 1.19025[T]=KCTC 92136[T]).}, } @article {pmid35818004, year = {2022}, author = {Contreras-Moreira, B and Del Río, ÁR and Cantalapiedra, CP and Sancho, R and Vinuesa, P}, title = {Pangenome Analysis of Plant Transcripts and Coding Sequences.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2512}, number = {}, pages = {121-152}, doi = {10.1007/978-1-0716-2429-6_9}, pmid = {35818004}, issn = {1940-6029}, mesh = {Humans ; Phylogeny ; *Software ; }, abstract = {The pangenome of a species is the sum of the genomes of its individuals. As coding sequences often represent only a small fraction of each genome, analyzing the pangene set can be a cost-effective strategy for plants with large genomes or highly heterozygous species. Here, we describe a step-by-step protocol to analyze plant pangene sets with the software GET_HOMOLOGUES-EST . After a short introduction, where the main concepts are illustrated, the remaining sections cover the installation and typical operations required to analyze and annotate pantranscriptomes and gene sets of plants. The recipes include instructions on how to call core and accessory genes, how to compute a presence-absence pangenome matrix, and how to identify and analyze private genes, present only in some genotypes. Downstream phylogenetic analyses are also discussed.}, } @article {pmid35818000, year = {2022}, author = {Tay Fernandez, CG and Marsh, JI and Nestor, BJ and Gill, M and Golicz, AA and Bayer, PE and Edwards, D}, title = {An SGSGeneloss-Based Method for Constructing a Gene Presence-Absence Table Using Mosdepth.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2512}, number = {}, pages = {73-80}, doi = {10.1007/978-1-0716-2429-6_5}, pmid = {35818000}, issn = {1940-6029}, mesh = {*Genome ; *Genomics/methods ; High-Throughput Nucleotide Sequencing/methods ; Humans ; Sequence Analysis, DNA/methods ; }, abstract = {Presence-absence variants (PAV) are genomic regions present in some individuals of a species, but not others. PAVs have been shown to contribute to genomic diversity, especially in bacteria and plants. These structural variations have been linked to traits and can be used to track a species' evolutionary history. PAVs are usually called by aligning short read sequence data from one or more individuals to a reference genome or pangenome assembly, and then comparing coverage. Regions where reads do not align define absence in that individual, and the regions are classified as PAVs. The method below details how to align sequence reads to a reference and how to use the sequencing-coverage calculator Mosdepth to identify PAVs and construct a PAV table for use in downstream comparative genome analysis.}, } @article {pmid35817620, year = {2022}, author = {Garg, S and Balboa, R and Kuja, J}, title = {Chromosome-scale haplotype-resolved pangenomics.}, journal = {Trends in genetics : TIG}, volume = {38}, number = {11}, pages = {1103-1107}, doi = {10.1016/j.tig.2022.06.011}, pmid = {35817620}, issn = {0168-9525}, mesh = {*Chromosomes/genetics ; Haplotypes/genetics ; Humans ; }, abstract = {Complete pangenomics is crucial for understanding genetic diversity and evolution across the tree of life. Chromosome-scale, haplotype-resolved pangenomics allows complex structural variations, long-range interactions, and associated functions to be discerned in species populations. We explore the need for high-resolution pangenomes, discuss computational strategies for their development, and describe applications in biodiversity and human health.}, } @article {pmid35815569, year = {2022}, author = {González-Díaz, A and Berbel, D and Ercibengoa, M and Cercenado, E and Larrosa, N and Quesada, MD and Casabella, A and Cubero, M and Marimón, JM and Domínguez, MÁ and Carrera-Salinas, A and Càmara, J and Martín-Galiano, AJ and Yuste, J and Martí, S and Ardanuy, C}, title = {Genomic features of predominant non-PCV13 serotypes responsible for adult invasive pneumococcal disease in Spain.}, journal = {The Journal of antimicrobial chemotherapy}, volume = {77}, number = {9}, pages = {2389-2398}, doi = {10.1093/jac/dkac199}, pmid = {35815569}, issn = {1460-2091}, support = {//Instituto de Salud Carlos III/ ; //European Regional Development Fund/European Social Fund/ ; //Amazon Web Services/ ; //ISCIII/ ; //AWS/ ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Genomics ; Humans ; Penicillins ; *Pneumococcal Infections/epidemiology/prevention & control ; Pneumococcal Vaccines ; Serogroup ; Spain/epidemiology ; }, abstract = {BACKGROUND: Although pneumococcal conjugate vaccines (PCVs) effectively prevent invasive pneumococcal disease (IPD), serotype replacement has occurred.

OBJECTIVES: We studied the pangenome, antibiotic resistance mechanisms and presence of mobile elements in predominant non-PCV13 serotypes causing adult IPD after PCV13 vaccine introduction in Spain.

METHODS: We conducted a multicentre study comparing three periods in six Spanish hospitals and analysed through whole genome sequencing representative strains collected in the pre-PCV13, early-PCV13 and late-PCV13 periods.

RESULTS: Among 2197 cases of adult IPD identified, 110 pneumococci expressing non-PCV13 capsules were sequenced. Seven predominant serotypes accounted for 42.6% of IPD episodes in the late-PCV13 period: serotypes 8 (14.4%), 12F (7.5%), 9N (5.2%), 11A (4.1%), 22F (3.9%), 24F (3.9%) and 16F (3.6%). All predominant non-PCV13 serotypes were highly clonal, comprising one or two clonal complexes (CC). In general, CC538, CC4048, CC3016F, CC43322F and CC669N, related to predominant non-PCV13 serotypes, were antibiotic susceptible. CC15611A was associated with resistance to co-trimoxazole, penicillin and amoxicillin. CC23024F was non-susceptible to penicillin and resistant to erythromycin, clindamycin, and tetracycline. Six composite transposon structures of the Tn5252-family were found in CC23024F, CC98912F and CC3016F carrying different combinations of erm(B), tet(M), and cat. Pangenome analysis revealed differences in accessory genomes among the different CC, with most variety in CC3016F (23.9%) and more conservation in CC15611A (8.5%).

CONCLUSIONS: We identified highly clonal predominant serotypes responsible for IPD in adults. The detection of not only conjugative elements carrying resistance determinants but also clones previously associated with vaccine serotypes (CC15611A and CC23024F) highlights the importance of the accessory genome.}, } @article {pmid35809555, year = {2022}, author = {Yang, T and Gao, F}, title = {High-quality pan-genome of Escherichia coli generated by excluding confounding and highly similar strains reveals an association between unique gene clusters and genomic islands.}, journal = {Briefings in bioinformatics}, volume = {23}, number = {4}, pages = {}, doi = {10.1093/bib/bbac283}, pmid = {35809555}, issn = {1477-4054}, mesh = {*Escherichia coli/genetics ; Genome, Bacterial ; *Genomic Islands ; Multigene Family ; Phylogeny ; }, abstract = {The pan-genome analysis of bacteria provides detailed insight into the diversity and evolution of a bacterial population. However, the genomes involved in the pan-genome analysis should be checked carefully, as the inclusion of confounding strains would have unfavorable effects on the identification of core genes, and the highly similar strains could bias the results of the pan-genome state (open versus closed). In this study, we found that the inclusion of highly similar strains also affects the results of unique genes in pan-genome analysis, which leads to a significant underestimation of the number of unique genes in the pan-genome. Therefore, these strains should be excluded from pan-genome analysis at the early stage of data processing. Currently, tens of thousands of genomes have been sequenced for Escherichia coli, which provides an unprecedented opportunity as well as a challenge for pan-genome analysis of this classical model organism. Using the proposed strategies, a high-quality E. coli pan-genome was obtained, and the unique genes was extracted and analyzed, revealing an association between the unique gene clusters and genomic islands from a pan-genome perspective, which may facilitate the identification of genomic islands.}, } @article {pmid35805383, year = {2022}, author = {Alshammari, A and Alharbi, M and Alghamdi, A and Alharbi, SA and Ashfaq, UA and Tahir Ul Qamar, M and Ullah, A and Irfan, M and Khan, A and Ahmad, S}, title = {Computer-Aided Multi-Epitope Vaccine Design against Enterobacter xiangfangensis.}, journal = {International journal of environmental research and public health}, volume = {19}, number = {13}, pages = {}, pmid = {35805383}, issn = {1660-4601}, mesh = {Anti-Bacterial Agents ; Computational Biology ; Computers ; Enterobacter ; Epitopes/chemistry ; Molecular Docking Simulation ; *Proteome ; *Vaccines ; }, abstract = {Antibiotic resistance is a global public health threat and is associated with high mortality due to antibiotics' inability to treat bacterial infections. Enterobacter xiangfangensis is an emerging antibiotic-resistant bacterial pathogen from the Enterobacter genus and has the ability to acquire resistance to multiple antibiotic classes. Currently, there is no effective vaccine against Enterobacter species. In this study, a chimeric vaccine is designed comprising different epitopes screened from E. xiangfangensis proteomes using immunoinformatic and bioinformatic approaches. In the first phase, six fully sequenced proteomes were investigated by bacterial pan-genome analysis, which revealed that the pathogen consists of 21,996 core proteins, 3785 non-redundant proteins and 18,211 redundant proteins. The non-redundant proteins were considered for the vaccine target prioritization phase where different vaccine filters were applied. By doing so, two proteins; ferrichrome porin (FhuA) and peptidoglycan-associated lipoprotein (Pal) were shortlisted for epitope prediction. Based on properties of antigenicity, allergenicity, water solubility and DRB*0101 binding ability, three epitopes (GPAPTIAAKR, ATKTDTPIEK and RNNGTTAEI) were used in multi-epitope vaccine designing. The designed vaccine construct was analyzed in a docking study with immune cell receptors, which predicted the vaccine's proper binding with said receptors. Molecular dynamics analysis revealed that the vaccine demonstrated stable binding dynamics, and binding free energy calculations further validated the docking results. In conclusion, these in silico results may help experimentalists in developing a vaccine against E. xiangfangensis in specific and Enterobacter in general.}, } @article {pmid35799353, year = {2022}, author = {Dabbaghie, F and Ebler, J and Marschall, T}, title = {BubbleGun: enumerating bubbles and superbubbles in genome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {17}, pages = {4217-4219}, pmid = {35799353}, issn = {1367-4811}, support = {//German Federal Ministry for Research and Education/ ; 031L0184A//BMBF/ ; }, mesh = {Humans ; Sequence Analysis, DNA/methods ; *Software ; *Algorithms ; Genome ; Genomics/methods ; }, abstract = {MOTIVATION: With the fast development of sequencing technology, accurate de novo genome assembly is now possible even for larger genomes. Graph-based representations of genomes arise both as part of the assembly process, but also in the context of pangenomes representing a population. In both cases, polymorphic loci lead to bubble structures in such graphs. Detecting bubbles is hence an important task when working with genomic variants in the context of genome graphs.

RESULTS: Here, we present a fast general-purpose tool, called BubbleGun, for detecting bubbles and superbubbles in genome graphs. Furthermore, BubbleGun detects and outputs runs of linearly connected bubbles and superbubbles, which we call bubble chains. We showcase its utility on de Bruijn graphs and compare our results to vg's snarl detection. We show that BubbleGun is considerably faster than vg especially in bigger graphs, where it reports all bubbles in less than 30 min on a human sample de Bruijn graph of around 2 million nodes.

BubbleGun is available and documented as a Python3 package at https://github.com/fawaz-dabbaghieh/bubble_gun under MIT license.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35799261, year = {2022}, author = {Vezina, B and Rosa, MN and Canu, A and Tola, S}, title = {Genomic surveillance reveals antibiotic resistance gene transmission via phage recombinases within sheep mastitis-associated Streptococcus uberis.}, journal = {BMC veterinary research}, volume = {18}, number = {1}, pages = {264}, pmid = {35799261}, issn = {1746-6148}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; *Bacteriophages ; Cattle ; *Cattle Diseases ; Drug Resistance, Microbial ; Female ; Genomics ; *Mastitis, Bovine/epidemiology ; Multilocus Sequence Typing/veterinary ; Recombinases ; Sheep ; *Sheep Diseases/epidemiology ; *Streptococcal Infections/epidemiology/veterinary ; Streptococcus ; }, abstract = {BACKGROUND: Streptococcus uberis is one of the main causative agents of ovine mastitis, however little is known about this global, environmental pathogen and its genomic mechanisms of disease. In this study, we performed genomic analysis on 46 S. uberis isolates collected from mastitis-infected sheep in Sardinia (Italy).

RESULTS: Genomes were assigned into lineage clusters using PopPUNK, which found 27 distinct isolate clusters, indicating considerable genetic variability consistent with environmental isolates. Geographic trends were identified including regional linkage of several isolate clusters. Multi-locus Sequence Typing (MLST) performed poorly and provided no new insights. Genomes were then screened for antimicrobial resistance genes, which were compared to phenotypic resistance profiles. Isolates showed consistent phenotypic resistance to aminoglycosides with variable resistance to novobiocin and tetracycline. In general, identification of antimicrobial resistance genes did not correlate with phenotypic resistance profiles, indicating unknown genetic determinants. A multi-antimicrobial resistance cassette (aminoglycoside, lincosamide and streptogramin) was identified in the chromosome of three genomes, flanked by vestigial phage recombinases. This locus appears to have spread horizontally within discrete S. uberis populations within a 40 km radius (Sassari region). Genomes were screened for putative virulence factors, which identified 16 genes conserved between sheep and cow isolates, with no host-specific genes shared uniformly across all host-specific isolates. Pangenomic analysis was then performed to identify core genes which were putatively surface-exposed, for identification of potential vaccine targets. As all genomes encoded sortase, core genes were screened for the sortase cleavage motif. Of the 1445 core S. uberis genes, 64 were putative sortase substrates and were predominantly adhesins, permeases and peptidases, consistent with compounds found within ruminant milk such as xanthine, fibronectin and lactoferrin.

CONCLUSIONS: This study demonstrated the importance of whole genome sequencing for surveillance of S. uberis and tracking horizontal acquisition of antimicrobial resistance genes, as well as providing insight into genetic determinants of disease, which cannot be inferred from the MLST schemes. Future mastitis surveillance should be informed by genomic analysis.}, } @article {pmid35795189, year = {2022}, author = {Saraiva, MMS and Benevides, VP and da Silva, NMV and Varani, AM and de Freitas Neto, OC and Berchieri, Â and Delgado-Suárez, EJ and Rocha, ADL and Eguale, T and Munyalo, JA and Kariuki, S and Gebreyes, WA and de Oliveira, CJB}, title = {Genomic and Evolutionary Analysis of Salmonella enterica Serovar Kentucky Sequence Type 198 Isolated From Livestock In East Africa.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {772829}, pmid = {35795189}, issn = {2235-2988}, mesh = {Animals ; Anti-Bacterial Agents/pharmacology ; Ciprofloxacin ; Drug Resistance, Multiple, Bacterial/genetics ; Genomics ; Kentucky ; Livestock ; Phylogeny ; *Salmonella enterica/genetics ; Serogroup ; Streptomycin ; }, abstract = {Since its emergence in the beginning of the 90's, multidrug-resistant (MDR) Salmonella enterica subsp. enterica serovar Kentucky has become a significant public health problem, especially in East Africa. This study aimed to investigate the antimicrobial resistance profile and the genotypic relatedness of Salmonella Kentucky isolated from animal sources in Ethiopia and Kenya (n=19). We also investigated population evolutionary dynamics through phylogenetic and pangenome analyses with additional publicly available Salmonella Kentucky ST198 genomes (n=229). All the 19 sequenced Salmonella Kentucky isolates were identified as ST198. Among these isolates, the predominant genotypic antimicrobial resistance profile observed in ten (59.7%) isolates included the aac(3)-Id, aadA7, strA-strB, blaTEM-1B, sul1, and tet(A) genes, which mediated resistance to gentamicin, streptomycin/spectinomycin, streptomycin, ampicillin, sulfamethoxazole and tetracycline, respectively; and gyrA and parC mutations associated to ciprofloxacin resistance. Four isolates harbored plasmid types Incl1 and/or Col8282; two of them carried both plasmids. Salmonella Pathogenicity islands (SPI-1 to SPI-5) were highly conserved in the 19 sequenced Salmonella Kentucky isolates. Moreover, at least one Pathogenicity Island (SPI 1-4, SPI 9 or C63PI) was identified among the 229 public Salmonella Kentucky genomes. The phylogenetic analysis revealed that almost all Salmonella Kentucky ST198 isolates (17/19) stemmed from a single strain that has accumulated ciprofloxacin resistance-mediating mutations. A total of 8,104 different genes were identified in a heterogenic and still open Salmonella Kentucky ST198 pangenome. Considering the virulence factors and antimicrobial resistance genes detected in Salmonella Kentucky, the implications of this pathogen to public health and the epidemiological drivers for its dissemination must be investigated.}, } @article {pmid35779281, year = {2022}, author = {Garg, G and Kamphuis, LG and Bayer, PE and Kaur, P and Dudchenko, O and Taylor, CM and Frick, KM and Foley, RC and Gao, LL and Aiden, EL and Edwards, D and Singh, KB}, title = {A pan-genome and chromosome-length reference genome of narrow-leafed lupin (Lupinus angustifolius) reveals genomic diversity and insights into key industry and biological traits.}, journal = {The Plant journal : for cell and molecular biology}, volume = {111}, number = {5}, pages = {1252-1266}, pmid = {35779281}, issn = {1365-313X}, support = {UM1 HG009375/HG/NHGRI NIH HHS/United States ; RM1 HG011016/HG/NHGRI NIH HHS/United States ; }, mesh = {Australia ; Chromosomes ; Genomics ; Humans ; *Lupinus/genetics ; Plant Breeding ; }, abstract = {Narrow-leafed lupin (NLL; Lupinus angustifolius) is a key rotational crop for sustainable farming systems, whose grain is high in protein content. It is a gluten-free, non-genetically modified, alternative protein source to soybean (Glycine max) and as such has gained interest as a human food ingredient. Here, we present a chromosome-length reference genome for the species and a pan-genome assembly comprising 55 NLL lines, including Australian and European cultivars, breeding lines and wild accessions. We present the core and variable genes for the species and report on the absence of essential mycorrhizal associated genes. The genome and pan-genomes of NLL and its close relative white lupin (Lupinus albus) are compared. Furthermore, we provide additional evidence supporting LaRAP2-7 as the key alkaloid regulatory gene for NLL and demonstrate the NLL genome is underrepresented in classical NLR disease resistance genes compared to other sequenced legume species. The NLL genomic resources generated here coupled with previously generated RNA sequencing datasets provide new opportunities to fast-track lupin crop improvement.}, } @article {pmid35763423, year = {2022}, author = {Ceres, KM and Stanhope, MJ and Gröhn, YT}, title = {A critical evaluation of Mycobacterium bovis pangenomics, with reference to its utility in outbreak investigation.}, journal = {Microbial genomics}, volume = {8}, number = {6}, pages = {}, pmid = {35763423}, issn = {2057-5858}, mesh = {Animals ; Cattle ; Disease Outbreaks ; *Mycobacterium bovis/genetics ; *Tuberculosis, Bovine/microbiology ; }, abstract = {The increased accessibility of next generation sequencing has allowed enough genomes from a given bacterial species to be sequenced to describe the distribution of genes in the pangenome, without limiting analyses to genes present in reference strains. Although some taxa have thousands of whole genome sequences available on public databases, most genomes were sequenced with short read technology, resulting in incomplete assemblies. Studying pangenomes could lead to important insights into adaptation, pathogenicity, or molecular epidemiology, however given the known information loss inherent in analyzing contig-level assemblies, these inferences may be biased or inaccurate. In this study we describe the pangenome of a clonally evolving pathogen, Mycobacterium bovis , and examine the utility of gene content variation in M. bovis outbreak investigation. We constructed the M. bovis pangenome using 1463 de novo assembled genomes. We tested the assumption of strict clonal evolution by studying evidence of recombination in core genes and analyzing the distribution of accessory genes among core monophyletic groups. To determine if gene content variation could be utilized in outbreak investigation, we carefully examined accessory genes detected in a well described M. bovis outbreak in Minnesota. We found significant errors in accessory gene classification. After accounting for these errors, we show that M. bovis has a much smaller accessory genome than previously described and provide evidence supporting ongoing clonal evolution and a closed pangenome, with little gene content variation generated over outbreaks. We also identified frameshift mutations in multiple genes, including a mutation in glpK, which has recently been associated with antibiotic tolerance in Mycobacterium tuberculosis . A pangenomic approach enables a more comprehensive analysis of genome dynamics than is possible with reference-based approaches; however, without critical evaluation of accessory gene content, inferences of transmission patterns employing these loci could be misguided.}, } @article {pmid35758593, year = {2022}, author = {Sang, J and Zhuang, D and Zhang, T and Wu, Q and Yu, J and Zhang, Z}, title = {Convergent and Divergent Age Patterning of Gut Microbiota Diversity in Humans and Nonhuman Primates.}, journal = {mSystems}, volume = {7}, number = {4}, pages = {e0151221}, pmid = {35758593}, issn = {2379-5077}, mesh = {Animals ; Adult ; Humans ; *Gastrointestinal Microbiome/genetics ; Macaca mulatta/genetics ; RNA, Ribosomal, 16S/genetics ; Hydroxyproline ; Leucine ; }, abstract = {The gut microbiome has significant effects on healthy aging and aging-related diseases, whether in humans or nonhuman primates. However, little is known about the divergence and convergence of gut microbial diversity between humans and nonhuman primates during aging, which limits their applicability for studying the gut microbiome's role in human health and aging. Here, we performed 16S rRNA gene sequencing analysis for captive rhesus macaques (Macaca mulatta) and compared this data set with other freely available gut microbial data sets containing four human populations (Chinese, Japanese, Italian, and British) and two nonhuman primates (wild lemurs [Lemur catta] and wild chimpanzees [Pan troglodytes]). Based on the consistent V4 region of the 16S rRNA gene, beta diversity analysis suggested significantly separated gut microbial communities associated with host backgrounds of seven host groups, but within each group, significant gut microbial divergences were observed, and indicator bacterial genera were identified as associated with aging. We further discovered six common anti-inflammatory gut bacteria (Prevotellamassilia, Prevotella, Gemmiger, Coprococcus, Faecalibacterium, and Roseburia) that had butyrate-producing potentials suggested by pangenomic analysis and that showed similar dynamic changes in at least two selected host groups during aging, independent of distinct host backgrounds. Finally, we found striking age-related changes in 66 plasma metabolites in macaques. Two highly changed metabolites, hydroxyproline and leucine, enriched in adult macaques were significantly and positively correlated with Prevotella and Prevotellamassilia. Furthermore, genus-level pangenome analysis suggested that those six common indicator bacteria can synthesize leucine and arginine as hydroxyproline and proline precursors in both humans and macaques. IMPORTANCE This study provides the first comprehensive investigation of age patterning of gut microbiota of four human populations and three nonhuman primates and found that Prevotellamassilia, Prevotella, Gemmiger, Coprococcus, Faecalibacterium, and Roseburia may be common antiaging microbial markers in both humans and nonhuman primates due to their potential metabolic capabilities for host health benefits. Our results also provide key support for using macaques as animal models in studies of the gut microbiome's role during human aging.}, } @article {pmid35752938, year = {2022}, author = {Liu, C and Wang, Y and Peng, J and Fan, B and Xu, D and Wu, J and Cao, Z and Gao, Y and Wang, X and Li, S and Su, Q and Zhang, Z and Wang, S and Wu, X and Shang, Q and Shi, H and Shen, Y and Wang, B and Tian, J}, title = {High-quality genome assembly and pan-genome studies facilitate genetic discovery in mung bean and its improvement.}, journal = {Plant communications}, volume = {3}, number = {6}, pages = {100352}, pmid = {35752938}, issn = {2590-3462}, mesh = {*Vigna/genetics ; Genome-Wide Association Study ; Plant Breeding ; *Fabaceae/genetics ; Polymorphism, Single Nucleotide ; }, abstract = {Mung bean is an economically important legume crop species that is used as a food, consumed as a vegetable, and used as an ingredient and even as a medicine. To explore the genomic diversity of mung bean, we assembled a high-quality reference genome (Vrad_JL7) that was ∼479.35 Mb in size, with a contig N50 length of 10.34 Mb. A total of 40,125 protein-coding genes were annotated, representing ∼96.9% of the genetic region. We also sequenced 217 accessions, mainly landraces and cultivars from China, and identified 2,229,343 high-quality single-nucleotide polymorphisms (SNPs). Population structure revealed that the Chinese accessions diverged into two groups and were distinct from non-Chinese lines. Genetic diversity analysis based on genomic data from 750 accessions in 23 countries supported the hypothesis that mung bean was first domesticated in south Asia and introduced to east Asia probably through the Silk Road. We constructed the first pan-genome of mung bean germplasm and assembled 287.73 Mb of non-reference sequences. Among the genes, 83.1% were core genes and 16.9% were variable. Presence/absence variation (PAV) events of nine genes involved in the regulation of the photoperiodic flowering pathway were identified as being under selection during the adaptation process to promote early flowering in the spring. Genome-wide association studies (GWASs) revealed 2,912 SNPs and 259 gene PAV events associated with 33 agronomic traits, including a SNP in the coding region of the SWEET10 homolog (jg24043) involved in crude starch content and a PAV event in a large fragment containing 11 genes for color-related traits. This high-quality reference genome and pan-genome will provide insights into mung bean breeding.}, } @article {pmid35752768, year = {2022}, author = {Guo, G and Wang, Z and Li, Q and Yu, Y and Li, Y and Tan, Z and Zhang, W}, title = {Genomic characterization of Streptococcus parasuis, a close relative of Streptococcus suis and also a potential opportunistic zoonotic pathogen.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {469}, pmid = {35752768}, issn = {1471-2164}, support = {KYCX21_0643//Postgraduate Research & Practice Innovation Program of Jiangsu Province/ ; ZD2021037//Key scientific research project of Jiangsu Commission of Health/ ; 31772751//National Natural Science Foundation of China/ ; NAUSY-MS12//Guidance Foundation, the Sanya Institute of Nanjing Agricultural University/ ; }, mesh = {Animals ; Cattle ; Genomics ; *Streptococcal Infections/veterinary ; Streptococcus ; *Streptococcus suis/genetics ; Swine ; *Swine Diseases ; Virulence/genetics ; }, abstract = {Streptococcus parasuis (S. parasuis) is a close relative of Streptococcus suis (S. suis), composed of former members of S. suis serotypes 20, 22 and 26. S. parasuis could infect pigs and cows, and recently, human infection cases have been reported, making S. parasuis a potential opportunistic zoonotic pathogen. In this study, we analysed the genomic characteristics of S. parasuis, using pan-genome analysis, and compare some phenotypic determinants such as capsular polysaccharide, integrative conjugative elements, CRISPR-Cas system and pili, and predicted the potential virulence genes by associated analysis of the clinical condition of isolated source animals and genotypes. Furthermore, to discuss the relationship with S. suis, we compared these characteristics of S. parasuis with those of S. suis. We found that the characteristics of S. parasuis are similar to those of S. suis, both of them have "open" pan-genome, their antimicrobial resistance gene profiles are similar and a srtF pilus cluster of S. suis was identified in S. parasuis genome. But S. parasuis still have its unique characteristics, two novel pilus clusters are and three different type CRISPR-Cas system were found. Therefore, this study provides novel insights into the interspecific and intraspecific genetic characteristics of S. parasuis, which can be useful for further study of this opportunistic pathogen, such as serotyping, diagnostics, vaccine development, and study of the pathogenesis mechanism.}, } @article {pmid35752693, year = {2022}, author = {Huang, G and Zhu, Y}, title = {Insights of section-wide pan-genome into hybrid potato breeding.}, journal = {Science China. Life sciences}, volume = {65}, number = {10}, pages = {2125-2127}, pmid = {35752693}, issn = {1869-1889}, mesh = {Genome ; Plant Breeding ; *Solanum tuberosum/genetics ; Tetraploidy ; }, } @article {pmid35751915, year = {2022}, author = {Menghwar, H and Perez-Casal, J}, title = {Comparative genomic analysis of Canadian Mycoplasma bovis strains isolated from Bison and Cattle.}, journal = {Comparative immunology, microbiology and infectious diseases}, volume = {87}, number = {}, pages = {101835}, doi = {10.1016/j.cimid.2022.101835}, pmid = {35751915}, issn = {1878-1667}, mesh = {Animals ; *Bison ; Canada/epidemiology ; Cattle ; Female ; Genomics ; *Mycoplasma Infections/epidemiology/veterinary ; *Mycoplasma bovis/genetics ; Virulence Factors/genetics ; }, abstract = {Mycoplasma bovis (M. bovis) in cattle causes pneumonia, arthritis, otitis media, and mastitis. In addition, multiple outbreaks have been recorded in North American bison. The genomic data on Canadian M. bovis in bison and cattle to date is limited. Whole-genome sequencing (WGS) was used to assess the degree of genome conservation across four Canadian M. bovis strains recovered from bison and cattle. Whole-genome sequences of four M. bovis isolates (Mb1, Mb160, Mb300, Mb304) and the PG45 reference genome were utilized to identify the M. bovis genomic similarity, whole-genome single nucleotide polymorphism (WGS-SNP), virulence determinants, and genomic islands. The pan-genome analysis showed that M. bovis encodes a minimum of 971 genes, while the core genome contained 637 genes. Comparative genomics revealed limited diversity in gene content between bison and cattle isolates. Whole-genome SNP analysis showed that the four M. bovis isolates differed from each other and to PG45. A total of 40 putative virulence genes associated with adhesion, colonization, and destruction of tissues were found in the bison and cattle isolates using the virulence factors database (VFDB). These putative virulence factors were equally distributed among isolates. Genomic Islands (GIs) ranging from 4 to 9 and associated with transposases, restriction-modification, ribosomal hypothetical proteins, variable surface lipoproteins, and unknowns were also identified. Overall, the genomic characterization of these isolates may provide new insights into the mechanisms of pathogenicity in M. bovis.}, } @article {pmid35750675, year = {2022}, author = {Kutyna, DR and Onetto, CA and Williams, TC and Goold, HD and Paulsen, IT and Pretorius, IS and Johnson, DL and Borneman, AR}, title = {Construction of a synthetic Saccharomyces cerevisiae pan-genome neo-chromosome.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {3628}, pmid = {35750675}, issn = {2041-1723}, mesh = {Chromosomes, Artificial, Yeast/genetics ; *Genome, Fungal/genetics ; *Saccharomyces cerevisiae/genetics ; Synthetic Biology ; }, abstract = {The Synthetic Yeast Genome Project (Sc2.0) represents the first foray into eukaryotic genome engineering and a framework for designing and building the next generation of industrial microbes. However, the laboratory strain S288c used lacks many of the genes that provide phenotypic diversity to industrial and environmental isolates. To address this shortcoming, we have designed and constructed a neo-chromosome that contains many of these diverse pan-genomic elements and which is compatible with the Sc2.0 design and test framework. The presence of this neo-chromosome provides phenotypic plasticity to the Sc2.0 parent strain, including expanding the range of utilizable carbon sources. We also demonstrate that the induction of programmable structural variation (SCRaMbLE) provides genetic diversity on which further adaptive gains could be selected. The presence of this neo-chromosome within the Sc2.0 backbone may therefore provide the means to adapt synthetic strains to a wider variety of environments, a process which will be vital to transitioning Sc2.0 from the laboratory into industrial applications.}, } @article {pmid35750315, year = {2022}, author = {Li, W and Liu, J and Zhang, H and Liu, Z and Wang, Y and Xing, L and He, Q and Du, H}, title = {Plant pan-genomics: recent advances, new challenges, and roads ahead.}, journal = {Journal of genetics and genomics = Yi chuan xue bao}, volume = {49}, number = {9}, pages = {833-846}, doi = {10.1016/j.jgg.2022.06.004}, pmid = {35750315}, issn = {1673-8527}, mesh = {Domestication ; *Genome, Plant/genetics ; *Genomics/methods ; }, abstract = {Pan-genomics can encompass most of the genetic diversity of a species or population and has proved to be a powerful tool for studying genomic evolution and the origin and domestication of species, and for providing information for plant improvement. Plant genomics has greatly progressed because of improvements in sequencing technologies and the rapid reduction of sequencing costs. Nevertheless, pan-genomics still presents many challenges, including computationally intensive assembly methods, high costs with large numbers of samples, ineffective integration of big data, and difficulty in applying it to downstream multi-omics analysis and breeding research. In this review, we summarize the definition and recent achievements of plant pan-genomics, computational technologies used for pan-genome construction, and the applications of pan-genomes in plant genomics and molecular breeding. We also discuss challenges and perspectives for future pan-genomics studies and provide a detailed pipeline for sample selection, genome assembly and annotation, structural variation identification, and construction and application of graph-based pan-genomes. The aim is to provide important guidance for plant pan-genome research and a better understanding of the genetic basis of genome evolution, crop domestication, and phenotypic diversity for future studies.}, } @article {pmid35748708, year = {2022}, author = {Bradbury, PJ and Casstevens, T and Jensen, SE and Johnson, LC and Miller, ZR and Monier, B and Romay, MC and Song, B and Buckler, ES}, title = {The Practical Haplotype Graph, a platform for storing and using pangenomes for imputation.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {15}, pages = {3698-3702}, pmid = {35748708}, issn = {1367-4811}, support = {IOS-1238014//U. S. Department of Agriculture-Agricultural Research Service, National Science Foundation Research-PGR/ ; OPP1159867//Bill and Melinda Gates Foundation/ ; }, mesh = {Haplotypes ; *Plant Breeding ; *Genome ; Genomics/methods ; Software ; }, abstract = {MOTIVATION: Pangenomes provide novel insights for population and quantitative genetics, genomics and breeding not available from studying a single reference genome. Instead, a species is better represented by a pangenome or collection of genomes. Unfortunately, managing and using pangenomes for genomically diverse species is computationally and practically challenging. We developed a trellis graph representation anchored to the reference genome that represents most pangenomes well and can be used to impute complete genomes from low density sequence or variant data.

RESULTS: The Practical Haplotype Graph (PHG) is a pangenome pipeline, database (PostGRES & SQLite), data model (Java, Kotlin or R) and Breeding API (BrAPI) web service. The PHG has already been able to accurately represent diversity in four major crops including maize, one of the most genomically diverse species, with up to 1000-fold data compression. Using simulated data, we show that, at even 0.1× coverage, with appropriate reads and sequence alignment, imputation results in extremely accurate haplotype reconstruction. The PHG is a platform and environment for the understanding and application of genomic diversity.

All resources listed here are freely available. The PHG Docker used to generate the simulation results is https://hub.docker.com/ as maizegenetics/phg:0.0.27. PHG source code is at https://bitbucket.org/bucklerlab/practicalhaplotypegraph/src/master/. The code used for the analysis of simulated data is at https://bitbucket.org/bucklerlab/phg-manuscript/src/master/. The PHG database of NAM parent haplotypes is in the CyVerse data store (https://de.cyverse.org/de/) and named/iplant/home/shared/panzea/panGenome/PHG_db_maize/phg_v5Assemblies_20200608.db.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35746733, year = {2022}, author = {Chandrasekar, SS and Phanse, Y and Riel, M and Hildebrand, RE and Hanafy, M and Osorio, JE and Abdelgayed, SS and Talaat, AM}, title = {Systemic Neutralizing Antibodies and Local Immune Responses Are Critical for the Control of SARS-CoV-2.}, journal = {Viruses}, volume = {14}, number = {6}, pages = {}, pmid = {35746733}, issn = {1999-4915}, mesh = {Animals ; *Antibodies, Neutralizing ; Antibodies, Viral ; Antibody Formation ; *COVID-19/prevention & control ; COVID-19 Vaccines ; Humans ; Mice ; Mice, Inbred BALB C ; SARS-CoV-2 ; Spike Glycoprotein, Coronavirus ; }, abstract = {Antibody measurements are primarily used to evaluate experimental and approved COVID-19 vaccines, which is unilateral considering our immune responses' complex nature. Previously, we showed that nanoparticle plasmid DNA adjuvant system, QAC, and MVA based vaccines were immunogenic against SARS-CoV-2. Here, we report on the protective efficacy of systemic humoral and mucosal cell-mediated immune responses in transgenic mice models against SARS-CoV-2 following nanoparticle immunization. Parenteral, intramuscular administration of QAC-based plasmid DNA vaccine-encoding SARS-CoV-2 S and N led to the induction of significant serum neutralizing humoral responses, which reduced viral burden in the lungs and prevented viral dissemination to the brain. In contrast, the mucosal, intranasal administration of a heterologous vaccine elicited significant mucosal cell-mediated immune responses in the lungs that limited lung viral replication. The presented results demonstrate that serum neutralizing humoral and local lung T-cell immune responses are critical for the control of SARS-CoV-2 replication.}, } @article {pmid35746494, year = {2022}, author = {Al-Megrin, WAI and Karkashan, A and Alnuqaydan, AM and Aba Alkhayl, FF and Alrumaihi, F and Almatroudi, A and Allemailem, KS}, title = {Design of a Multi-Epitopes Based Chimeric Vaccine against Enterobacter cloacae Using Pan-Genome and Reverse Vaccinology Approaches.}, journal = {Vaccines}, volume = {10}, number = {6}, pages = {}, pmid = {35746494}, issn = {2076-393X}, abstract = {Enterobacter cloacae (EC) is a significant emerging pathogen that is occasionally associated with lung infection, surgical site infection, urinary infection, sepsis, and outbreaks in neonatal intensive care units. In light of the fact that there is currently no approved vaccine or therapeutic option for the treatment of EC, the current study was developed to concentrate on applications based on modern computational approaches to design a multi-epitope-based E. cloacae peptide vaccine (MEBEPV) expressing the antigenic determinants prioritized from the EC genome. Integrated computational analyses identified two potential protein targets (phosphoporin protein-PhoE and putative outer-membrane porin protein) for further exploration on the basis of pangenome subtractive proteomics and immunoinformatic in-depth examination of the core proteomes. Then, a multi-epitope peptide vaccine was designed, which comprised shortlisted epitopes that were capable of eliciting both innate and adaptive immunity, as well as the cholera toxin's B-subunit, which was used as an adjuvant in the vaccine formulation. To ensure maximum expression, the vaccine's 3D structure was developed and the loop was refined, improving the stability by disulfide engineering, and the physicochemical characteristics of the recombinant vaccine sequence were found to be ideal for both in vitro and in vivo experimentation. Blind docking was then used for the prediction of the MEBEPV predominant blinding mode with MHCI, MHCII, and TLR3 innate immune receptors, with lowest global energy of -18.64 kJ/mol, -48.25 kJ/mol, and -5.20 kJ/mol for MHC-I, MHC-II, and TLR-4, respectively, with docked complexes considered for simulation. In MD and MMGBSA investigations, the docked models of MEBEPV-TLR3, MEBEPV-MHCI, and MEBEPV-MHCII were found to be stable during the course of the simulation. MM-GBSA analysis calculated -122.17 total net binding free energies for the TLR3-vaccine complex, -125.4 for the MHC I-vaccine complex, and -187.94 for the MHC II-vaccine complex. Next, MM-PBSA analysis calculated -115.63 binding free energy for the TLR3-vaccine complex, -118.19 for the MHC I-vaccine complex, and -184.61 for the MHC II-vaccine complex. When the vaccine was tested in silico, researchers discovered that it was capable of inducing both types of immune responses (cell mediated and humoral) at the same time. Even though the suggested MEBEPV has the potential to be a powerful contender against E. cloacae-associated illnesses, further testing in the laboratory will be required before it can be declared safe and immunogenic.}, } @article {pmid35745530, year = {2022}, author = {Jungkhun, N and Gomes de Farias, AR and Watcharachaiyakup, J and Kositcharoenkul, N and Ham, JH and Patarapuwadol, S}, title = {Phylogenetic Characterization and Genome Sequence Analysis of Burkholderia glumae Strains Isolated in Thailand as the Causal Agent of Rice Bacterial Panicle Blight.}, journal = {Pathogens (Basel, Switzerland)}, volume = {11}, number = {6}, pages = {}, pmid = {35745530}, issn = {2076-0817}, support = {1015305//National Institute of Food and Agriculture (NIFA) - USDA/ ; }, abstract = {Burkholderia glumae is one of the most critical rice-pathogenic bacteria, and it causes bacterial panicle blight (BPB) in rice plants. In 2017, BPB symptoms were observed from rice fields in Chiang Rai, Northern Thailand. Sixty-one isolates obtained from the symptomatic panicles of rice were initially identified as B. glumae by polymerase chain reaction (PCR) using species-specific primers. Among them, six selected strains isolated from the susceptible japonica rice cultivar DOA2 were characterized in terms of morpho-physiology, pathology, phylogenetics, and genomics. Our genome sequence analysis of the six selected strains revealed the presence of multiple prophages, which may reflect the high level of diversity in this bacterial species through dynamic horizontal gene transfer processes, including phage infection. This notion was supported by the results of phylogenetic and phylogenomic analyses, which showed the formation of several subgroups not related to the years of isolation or the geographical origins. This study reports the isolation of B. glumae as the causal pathogen of BPB disease in japonica rice in Thailand and provides genomic resources to better understand the biology and diversity of this plant pathogenic bacterium. Further studies with a vast collection of B. glumae strains from various rice-growing regions around the world are needed to elucidate the evolution, variability, and lifestyle of the pathogen.}, } @article {pmid35739387, year = {2022}, author = {Edwards, D and Batley, J}, title = {Graph pangenomes find missing heritability.}, journal = {Nature genetics}, volume = {54}, number = {7}, pages = {919-920}, pmid = {35739387}, issn = {1546-1718}, mesh = {*Genome-Wide Association Study ; *Models, Genetic ; }, } @article {pmid35736064, year = {2022}, author = {Guo, Y and Liu, Z and Fu, Y and Li, Y and Dai, Y and Xiao, S}, title = {Pan-Genomes Provide Insights into the Genetic Basis of Auricularia heimuer Domestication.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {6}, pages = {}, pmid = {35736064}, issn = {2309-608X}, support = {grant number 2021YFD1600401//the Key Project on R&D of the Ministry of Science and Technology/ ; No. D17014//the Program of Creation and Utilization of Germplasm of Mushroom Crop of "111" Project/ ; No. 2017B01011//the National-level International Joint Research Centre/ ; }, abstract = {In order to reveal the genetic variation signals of Auricularia heimuer that have occurred during their domestication and to find potential functional gene families, we constructed a monokaryotic pan-genome of A. heimuer representing four cultivated strains and four wild strains. The pan-genome contained 14,089 gene families, of which 67.56% were core gene families and 31.88% were dispensable gene families. We screened substrate utilization-related genes such as the chitinase gene ahchi1 of the glycoside hydrolase (GH) 18 family and a carbohydrate-binding module (CBM)-related gene from the dispensable families of cultivated populations. The genomic difference in the ahchi1 gene between the wild and cultivated genomes was caused by a 33 kb presence/absence variation (PAV). The detection rate of the ahchi1 gene was 93.75% in the cultivated population, significantly higher than that in the wild population (17.39%), indicating that it has been selected in cultivated strains. Principal component analysis (PCA) of the polymorphic markers in fragments near the ahchi1 gene was enriched in cultivated strains, and this was caused by multiple independent instances of artificial selection. We revealed for the first time the genetic basis of the ahchi1 gene in domestication, thereby providing a foundation for elucidating the potential function of the ahchi1 gene in the breeding of A. heimuer.}, } @article {pmid35733954, year = {2022}, author = {Cooper, ZS and Rapp, JZ and Shoemaker, AMD and Anderson, RE and Zhong, ZP and Deming, JW}, title = {Evolutionary Divergence of Marinobacter Strains in Cryopeg Brines as Revealed by Pangenomics.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {879116}, pmid = {35733954}, issn = {1664-302X}, abstract = {Marinobacter spp. are cosmopolitan in saline environments, displaying a diverse set of metabolisms that allow them to competitively occupy these environments, some of which can be extreme in both salinity and temperature. Here, we introduce a distinct cluster of Marinobacter genomes, composed of novel isolates and in silico assembled genomes obtained from subzero, hypersaline cryopeg brines, relic seawater-derived liquid habitats within permafrost sampled near Utqiaġvik, Alaska. Using these new genomes and 45 representative publicly available genomes of Marinobacter spp. from other settings, we assembled a pangenome to examine how the new extremophile members fit evolutionarily and ecologically, based on genetic potential and environmental source. This first genus-wide genomic analysis revealed that Marinobacter spp. in general encode metabolic pathways that are thermodynamically favored at low temperature, cover a broad range of organic compounds, and optimize protein usage, e.g., the Entner-Doudoroff pathway, the glyoxylate shunt, and amino acid metabolism. The new isolates contributed to a distinct clade of subzero brine-dwelling Marinobacter spp. that diverged genotypically and phylogenetically from all other Marinobacter members. The subzero brine clade displays genomic characteristics that may explain competitive adaptations to the extreme environments they inhabit, including more abundant membrane transport systems (e.g., for organic substrates, compatible solutes, and ions) and stress-induced transcriptional regulatory mechanisms (e.g., for cold and salt stress) than in the other Marinobacter clades. We also identified more abundant signatures of potential horizontal transfer of genes involved in transcription, the mobilome, and a variety of metabolite exchange systems, which led to considering the importance of this evolutionary mechanism in an extreme environment where adaptation via vertical evolution is physiologically rate limited. Assessing these new extremophile genomes in a pangenomic context has provided a unique view into the ecological and evolutionary history of the genus Marinobacter, particularly with regard to its remarkable diversity and its opportunism in extremely cold and saline environments.}, } @article {pmid35733110, year = {2022}, author = {Kuzmanović, N and Biondi, E and Overmann, J and Puławska, J and Verbarg, S and Smalla, K and Lassalle, F}, title = {Genomic analysis provides novel insights into diversification and taxonomy of Allorhizobium vitis (i.e. Agrobacterium vitis).}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {462}, pmid = {35733110}, issn = {1471-2164}, mesh = {Agrobacterium/genetics ; Genomics ; Phylogeny ; Plant Tumors ; *Rhizobiaceae/genetics ; *Vitis/genetics/microbiology ; }, abstract = {BACKGROUND: Allorhizobium vitis (formerly named Agrobacterium vitis or Agrobacterium biovar 3) is the primary causative agent of crown gall disease of grapevine worldwide. We obtained and analyzed whole-genome sequences of diverse All. vitis strains to get insights into their diversification and taxonomy.

RESULTS: Pairwise genome comparisons and phylogenomic analysis of various All. vitis strains clearly indicated that All. vitis is not a single species, but represents a species complex composed of several genomic species. Thus, we emended the description of All. vitis, which now refers to a restricted group of strains within the All. vitis species complex (i.e. All. vitis sensu stricto) and proposed a description of a novel species, All. ampelinum sp. nov. The type strain of All. vitis sensu stricto remains the current type strain of All. vitis, K309[T]. The type strain of All. ampelinum sp. nov. is S4[T]. We also identified sets of gene clusters specific to the All. vitis species complex, All. vitis sensu stricto and All. ampelinum, respectively, for which we predicted the biological function and infer the role in ecological diversification of these clades, including some we could experimentally validate. All. vitis species complex-specific genes confer tolerance to different stresses, including exposure to aromatic compounds. Similarly, All. vitis sensu stricto-specific genes confer the ability to degrade 4-hydroxyphenylacetate and a putative compound related to gentisic acid. All. ampelinum-specific genes have putative functions related to polyamine metabolism and nickel assimilation. Congruently with the genome-based classification, All. vitis sensu stricto and All. ampelinum were clearly delineated by MALDI-TOF MS analysis. Moreover, our genome-based analysis indicated that Allorhizobium is clearly separated from other genera of the family Rhizobiaceae.

CONCLUSIONS: Comparative genomics and phylogenomic analysis provided novel insights into the diversification and taxonomy of Allorhizobium vitis species complex, supporting our redefinition of All. vitis sensu stricto and description of All. ampelinum. Our pan-genome analyses suggest that these species have differentiated ecologies, each relying on specialized nutrient consumption or toxic compound degradation to adapt to their respective niche.}, } @article {pmid35731940, year = {2022}, author = {Romero Picazo, D and Werner, A and Dagan, T and Kupczok, A}, title = {Pangenome Evolution in Environmentally Transmitted Symbionts of Deep-Sea Mussels Is Governed by Vertical Inheritance.}, journal = {Genome biology and evolution}, volume = {14}, number = {7}, pages = {}, pmid = {35731940}, issn = {1759-6653}, mesh = {Animals ; Bacteria/genetics ; Gene Transfer, Horizontal ; Genome, Bacterial ; Methane ; *Mytilidae/genetics/microbiology ; Phylogeny ; Sulfur ; Symbiosis/genetics ; }, abstract = {Microbial pangenomes vary across species; their size and structure are determined by genetic diversity within the population and by gene loss and horizontal gene transfer (HGT). Many bacteria are associated with eukaryotic hosts where the host colonization dynamics may impact bacterial genome evolution. Host-associated lifestyle has been recognized as a barrier to HGT in parentally transmitted bacteria. However, pangenome evolution of environmentally acquired symbionts remains understudied, often due to limitations in symbiont cultivation. Using high-resolution metagenomics, here we study pangenome evolution of two co-occurring endosymbionts inhabiting Bathymodiolus brooksi mussels from a single cold seep. The symbionts, sulfur-oxidizing (SOX) and methane-oxidizing (MOX) gamma-proteobacteria, are environmentally acquired at an early developmental stage and individual mussels may harbor multiple strains of each symbiont species. We found differences in the accessory gene content of both symbionts across individual mussels, which are reflected by differences in symbiont strain composition. Compared with core genes, accessory genes are enriched in genome plasticity functions. We found no evidence for recent HGT between both symbionts. A comparison between the symbiont pangenomes revealed that the MOX population is less diverged and contains fewer accessory genes, supporting that the MOX association with B. brooksi is more recent in comparison to that of SOX. Our results show that the pangenomes of both symbionts evolved mainly by vertical inheritance. We conclude that genome evolution of environmentally transmitted symbionts that associate with individual hosts over their lifetime is affected by a narrow symbiosis where the frequency of HGT is constrained.}, } @article {pmid35731562, year = {2022}, author = {Moran, RA and Liu, H and Doughty, EL and Hua, X and Cummins, EA and Liveikis, T and McNally, A and Zhou, Z and van Schaik, W and Yu, Y}, title = {GR13-type plasmids in Acinetobacter potentiate the accumulation and horizontal transfer of diverse accessory genes.}, journal = {Microbial genomics}, volume = {8}, number = {6}, pages = {}, pmid = {35731562}, issn = {2057-5858}, support = {MR/S013660/1/MRC_/Medical Research Council/United Kingdom ; }, mesh = {*Acinetobacter baumannii/genetics ; Plasmids/genetics ; }, abstract = {Carbapenem and other antibiotic resistance genes (ARGs) can be found in plasmids in Acinetobacter , but many plasmid types in this genus have not been well-characterized. Here we describe the distribution, diversity and evolutionary capacity of rep group 13 (GR13) plasmids that are found in Acinetobacter species from diverse environments. Our investigation was prompted by the discovery of two GR13 plasmids in A. baumannii isolated in an intensive care unit (ICU). The plasmids harbour distinct accessory genes: pDETAB5 contains bla NDM-1 and genes that confer resistance to four further antibiotic classes, while pDETAB13 carries putative alcohol tolerance determinants. Both plasmids contain multiple dif modules, which are flanked by pdif sites recognized by XerC/XerD tyrosine recombinases. The ARG-containing dif modules in pDETAB5 are almost identical to those found in pDETAB2, a GR34 plasmid from an unrelated A. baumannii isolated in the same ICU a month prior. Examination of a further 41 complete, publicly available plasmid sequences revealed that the GR13 pangenome consists of just four core but 1186 accessory genes, 123 in the shell and 1063 in the cloud, reflecting substantial capacity for diversification. The GR13 core genome includes genes for replication and partitioning, and for a putative tyrosine recombinase. Accessory segments encode proteins with diverse putative functions, including for metabolism, antibiotic/heavy metal/alcohol tolerance, restriction-modification, an anti-phage system and multiple toxin–antitoxin systems. The movement of dif modules and actions of insertion sequences play an important role in generating diversity in GR13 plasmids. Discrete GR13 plasmid lineages are internationally disseminated and found in multiple Acinetobacter species, which suggests they are important platforms for the accumulation, horizontal transmission and persistence of accessory genes in this genus.}, } @article {pmid35731345, year = {2022}, author = {Da Silva, WM and Larzabal, M and Aburjaile, FF and Riviere, N and Martorelli, L and Bono, J and Amadio, A and Cataldi, A}, title = {Whole-genome sequencing analysis of Shiga toxin-producing Escherichia coli O22:H8 isolated from cattle prediction pathogenesis and colonization factors and position in STEC universe phylogeny.}, journal = {Journal of microbiology (Seoul, Korea)}, volume = {60}, number = {7}, pages = {689-704}, pmid = {35731345}, issn = {1976-3794}, mesh = {Animals ; Cattle ; *Escherichia coli Infections/microbiology/veterinary ; *Escherichia coli Proteins/genetics ; Phylogeny ; Shiga Toxin/genetics ; *Shiga-Toxigenic Escherichia coli/genetics ; Virulence Factors/genetics/metabolism ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) is a foodborne pathogen capable of causing illness in humans. In a previous study, our group showed that a STEC isolate belonging to O22:H8 serotype (strain 154) can interfere with STEC O157:H7 colonization both in vitro and in vivo. Using whole-genome sequencing and genomic comparative, we predicted a subset of genes acquired by O22:H8 strain 154 through horizontal gene transfer that might be responsible for the phenotype previously described by our group. Among them were identified genes related to the pathogenesis of non-LEE (locus of enterocyte effacement) STEC, specific metabolic processes, antibiotic resistance and genes encoding for the T6SS-1 that is related to inter-bacterial competition. In addition, we showed that this strain carries stx1c and stx2dact, a mucus-inducible variant. The results obtained in this study provide insights into STEC genomic plasticity and the importance of genomic islands in the adaptation and pathogenesis of this pathogen.}, } @article {pmid35731037, year = {2022}, author = {Wu, L and Xie, J and Qi, Y and Su, T and Jiang, L and Zhou, W and Jiang, Y and Zhang, C and Zhong, X and Cao, Y and Wang, W}, title = {Mutational landscape of non-functional adrenocortical adenomas.}, journal = {Endocrine-related cancer}, volume = {29}, number = {9}, pages = {521-532}, doi = {10.1530/ERC-21-0410}, pmid = {35731037}, issn = {1479-6821}, mesh = {*Adrenal Cortex Neoplasms/pathology ; *Adrenal Gland Neoplasms ; *Adrenocortical Adenoma/genetics/metabolism ; Carcinogenesis ; G Protein-Coupled Inwardly-Rectifying Potassium Channels/genetics ; Humans ; Mutation ; beta Catenin/genetics/metabolism ; }, abstract = {Adrenal incidentalomas are the most frequent human neoplasms. Recent genomic investigations on functional adrenocortical tumors have demonstrated that somatic mutations in PRKACA and KCNJ5 responsible for the development of adrenocortical adenomas (ACAs) are associated with hypercortisolism and aldosteronism, respectively. Several studies have identified CTNNB1 mutations in ACAs and have been mostly involved in the tumorigenesis of non-functional ACA (NFACA). However, integrated genomic characterization of NFACAs is lacking. In the current study, we utilized pan-genomic methods to comprehensively analyze 60 NFACA samples. A total of 1264 somatic mutations in coding regions among the 60 samples were identified, with a median of 15 non-silent mutations per tumor. Twenty-two NFACAs (36.67%) had genetic alterations in CTNNB1. We also identified several somatic mutations in genes of the cAMP/PKA pathway and KCNJ5. Histone modification genes (KMT2A, KMT2C, and KMT2D) were altered in 10% of cases. Germline mutations of MEN1 and RET were also found. Finally, by comparison of our transcriptome data with those available in the TCGA, we illustrated the molecular characterization of NFACA. We revealed the genetic profiling and molecular landscape of NFACA. Wnt/β-catenin pathway activation as shown ssby nuclear and/or cytoplasmic β-catenin accumulation is frequent, occurring in about one-third of ACA cases. cytochrome P450 enzymes could be markers to reveal the functional status of adrenocortical tumors. These observations strongly suggest the involvement of the Wnt/β-catenin pathway in benign adrenal tumorigenesis and possibly in the regulation of steroid secretion.}, } @article {pmid35730965, year = {2022}, author = {Bai, X and Ylinen, E and Zhang, J and Salmenlinna, S and Halkilahti, J and Saxen, H and Narayanan, A and Jahnukainen, T and Matussek, A}, title = {Comparative Genomics of Shiga Toxin-Producing Escherichia coli Strains Isolated from Pediatric Patients with and without Hemolytic Uremic Syndrome from 2000 to 2016 in Finland.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0066022}, pmid = {35730965}, issn = {2165-0497}, mesh = {Child ; *Escherichia coli Infections/epidemiology/microbiology ; Finland/epidemiology ; Genomics ; *Hemolytic-Uremic Syndrome/epidemiology/microbiology ; Humans ; Shiga Toxin ; *Shiga-Toxigenic Escherichia coli/genetics ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) infection can cause mild to severe illness, such as nonbloody or bloody diarrhea, and the fatal hemolytic uremic syndrome (HUS). The molecular mechanism underlying the variable pathogenicity of STEC infection is not fully defined so far. Here, we performed a comparative genomics study on a large collection of clinical STEC strains collected from STEC-infected pediatric patients with and without HUS in Finland over a 16-year period, aiming to identify the bacterial genetic factors that can predict the risk to cause HUS and poor renal outcome. Of 240 STEC strains included in this study, 52 (21.7%) were from pediatric patients with HUS. Serotype O157:H7 was the main cause of HUS, and Shiga toxin gene subtype stx2a was significantly associated with HUS. Comparative genomics and pangenome-wide association studies identified a number of virulence and accessory genes overrepresented in HUS-associated STEC compared to non-HUS STEC strains, including genes encoding cytolethal distending toxins, type III secretion system effectors, adherence factors, etc. No virulence or accessory gene was significantly associated with risk factors for poor renal outcome among HUS patients assessed in this study, including need for and duration of dialysis, presence and duration of anuria, and leukocyte counts. Whole-genome phylogeny and multiple-correspondence analysis of pangenomes could not separate HUS STEC from non-HUS STEC strains, suggesting that STEC strains with diverse genetic backgrounds may independently acquire genetic elements that determine their varied pathogenicity. Our findings indicate that nonbacterial factors, i.e., characteristics of the host immunity, might affect STEC virulence and clinical outcomes. IMPORTANCE Shiga toxin-producing Escherichia coli (STEC) is a serious public health burden worldwide which causes outbreaks of gastrointestinal diseases and the fatal hemolytic uremic syndrome (HUS) characterized by the triad of mechanical hemolytic anemia, thrombocytopenia, and acute renal failure. Understanding the mechanism underlying the disease severity and patient outcome is of high importance. Using comparative genomics on a large collection of clinical STEC strains from STEC-infected patients with and without HUS, our study provides a reference of STEC genetic factors/variants that can be used as predictors of the development of HUS, which will aid risk assessment at the early stage of STEC infection. Additionally, our findings suggest that nonbacterial factors may play a primary role in the renal outcome in STEC-infected patients with HUS; further studies are needed to validate this.}, } @article {pmid35729190, year = {2022}, author = {Rocha, J and Henriques, I and Gomila, M and Manaia, CM}, title = {Common and distinctive genomic features of Klebsiella pneumoniae thriving in the natural environment or in clinical settings.}, journal = {Scientific reports}, volume = {12}, number = {1}, pages = {10441}, pmid = {35729190}, issn = {2045-2322}, mesh = {Anti-Bacterial Agents ; Genomics ; Humans ; *Klebsiella Infections/microbiology ; *Klebsiella pneumoniae ; Microbial Sensitivity Tests ; Multilocus Sequence Typing ; Phylogeny ; beta-Lactamases/genetics ; }, abstract = {The Klebsiella pneumoniae complex is comprised of ubiquitous bacteria that can be found in soils, plants or water, and as humans' opportunistic pathogens. This study aimed at inferring common and distinctive features in clinical and environmental K. pneumoniae. Whole genome sequences of members of the K. pneumoniae complex (including K. variicola, n = 6; and K. quasipneumoniae, n = 7), of clinical (n = 78) and environmental (n = 61) origin from 21 countries were accessed from the GenBank. These genomes were compared based on phylogeny, pangenome and selected clinically relevant traits. Phylogenetic analysis based on 2704 genes of the core genome showed close relatedness between clinical and environmental strains, in agreement with the multi-locus sequence typing. Eight out of the 62 sequence types (STs) identified, included both clinical and environmental genomes (ST11, ST14, ST15, ST37, ST45, ST147, ST348, ST437). Pangenome-wide association studies did not evidence significant differences between clinical and environmental genomes. However, the genomes of clinical isolates presented significantly more exclusive genes related to antibiotic resistance/plasmids, while the environmental isolates yielded significantly higher allelic diversity of genes related with functions such as efflux or oxidative stress. The study suggests that K. pneumoniae can circulate among the natural environment and clinical settings, probably under distinct adaptation pressures.}, } @article {pmid35727540, year = {2022}, author = {Sollitto, M and Kenny, NJ and Greco, S and Tucci, CF and Calcino, AD and Gerdol, M}, title = {Detecting Structural Variants and Associated Gene Presence-Absence Variation Phenomena in the Genomes of Marine Organisms.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2498}, number = {}, pages = {53-76}, pmid = {35727540}, issn = {1940-6029}, mesh = {*Aquatic Organisms/genetics ; Genetic Variation ; Genome ; *Genomic Structural Variation ; Genomics/methods ; Sequence Analysis, DNA ; }, abstract = {As complete genomes become easier to attain, even from previously difficult-to-sequence species, and as genomic resequencing becomes more routine, it is becoming obvious that genomic structural variation is more widespread than originally thought and plays an important role in maintaining genetic variation in populations. Structural variants (SVs) and associated gene presence-absence variation (PAV) can be important players in local adaptation, allowing the maintenance of genetic variation and taking part in other evolutionarily relevant phenomena. While recent studies have highlighted the importance of structural variation in Mollusca, the prevalence of this phenomenon in the broader context of marine organisms remains to be fully investigated.Here, we describe a straightforward and broadly applicable method for the identification of SVs in fully assembled diploid genomes, leveraging the same reads used for assembly. We also explain a gene PAV analysis protocol, which could be broadly applied to any species with a fully sequenced reference genome available. Although the strength of these approaches have been tested and proven in marine invertebrates, which tend to have high levels of heterozygosity, possibly due to their lifestyle traits, they are also applicable to other species across the tree of life, providing a ready means to begin investigations into this potentially widespread phenomena.}, } @article {pmid35727397, year = {2022}, author = {Kumar, S and Bansal, K and Sethi, SK}, title = {Reclassification of Streptococcus ilei as a later heterotypic synonym of Streptococcus koreensis based on whole-genome sequence analysis.}, journal = {Archives of microbiology}, volume = {204}, number = {7}, pages = {408}, pmid = {35727397}, issn = {1432-072X}, mesh = {Bacterial Typing Techniques ; DNA, Bacterial/genetics ; Humans ; Nucleic Acid Hybridization ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; Sequence Analysis, DNA ; *Streptococcus/genetics ; }, abstract = {The genus Streptococcus, a member of family Streptococcaceae, is known for its wide range of industrial, clinical and human relevance. Among the species of genus Streptococcus two members, namely Streptococcus koreensis and Streptococcus ilei, were isolated from subgingival dental plaque and human small intestinal fluid, respectively. The 16S rRNA gene sequence similarity of the type strains of these members shows a similarity of 99.87%. In this study, we performed a systematic study to clarify the taxonomic assignment of these two species. Genome similarity assessment based on whole-genome sequence information such as average nucleotide identity using orthoANI and fastANI, digital DNA-DNA hybridization value between S. koreensis and S. ilei were 96.31, 96.60, 86.4 and 97.63, respectively. All these genome similarity values clearly exceeded the species delineation cutoffs. Phylogenetic assessment using 16S rRNA gene and whole-genome information using PhyloPhlAn, which uses around 400 conserved genes across bacterial phyla, provides additional evidence for these members forming a monophyletic clade in the phylogenetic tree. Pan genome analysis suggests a very large core genome (n = 1374) and the presence of no unique gene between the genomes of S. koreensis and S. ilei. Additionally, we found highly syntenic genomes of type strains of these two species. Based on these evidences, we propose S. ilei should be reclassified as a later heterotypic synonym of S. koreensis.}, } @article {pmid35727037, year = {2022}, author = {Montelongo, C and Mores, CR and Putonti, C and Wolfe, AJ and Abouelfetouh, A}, title = {Whole-Genome Sequencing of Staphylococcus aureus and Staphylococcus haemolyticus Clinical Isolates from Egypt.}, journal = {Microbiology spectrum}, volume = {10}, number = {4}, pages = {e0241321}, pmid = {35727037}, issn = {2165-0497}, support = {R01 DK104718/DK/NIDDK NIH HHS/United States ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Egypt/epidemiology ; Humans ; *Methicillin-Resistant Staphylococcus aureus/genetics ; Microbial Sensitivity Tests ; Multilocus Sequence Typing ; *Staphylococcal Infections/epidemiology ; Staphylococcus/genetics ; Staphylococcus aureus/genetics ; Staphylococcus haemolyticus/genetics ; }, abstract = {Infections caused by antibiotic-resistant Staphylococcus are a global concern. This is true in the Middle East, where increasingly resistant Staphylococcus aureus and Staphylococcus haemolyticus strains have been detected. While extensive surveys have revealed the prevalence of infections caused by antibiotic-resistant staphylococci in Europe, Asia, and North America, the population structure of antibiotic-resistant staphylococci recovered from patients and clinical settings in Egypt remains uncharacterized. We performed whole-genome sequencing of 56 S. aureus and 10 S. haemolyticus isolates from Alexandria Main University Hospital; 46 of the S. aureus genomes and all 10 of the S. haemolyticus genomes carry mecA, which confers methicillin resistance. Supplemented with additional publicly available genomes from the other parts of the Middle East (34 S. aureus and 6 S. haemolyticus), we present the largest genomic study to date of staphylococcal isolates from the Middle East. These genomes include 20 S. aureus multilocus sequence types (MLST), including 3 new ones. They also include 9 S. haemolyticus MLSTs, including 1 new one. Phylogenomic analyses of each species' core genome largely mirrored those of the MLSTs, irrespective of geographical origin. The hospital-acquired spa t037/ST239-SCCmec III/MLST CC8 clone represented the largest clade, comprising 22% of the S. aureus isolates. Like S. aureus genome surveys of other regions, these isolates from the Middle East have an open pangenome, a strong indicator of gene exchange of virulence factors and antibiotic resistance genes with other reservoirs. Our genome analyses will inform antibiotic stewardship and infection control plans in the Middle East. IMPORTANCE Staphylococci are understudied despite their prevalence within the Middle East. Methicillin-resistant Staphylococcus aureus (MRSA) is endemic to hospitals in Egypt, as are other antibiotic-resistant strains of S. aureus and S. haemolyticus. To provide insight into the strains circulating in Egypt, we performed whole-genome sequencing of 56 S. aureus and 10 S. haemolyticus isolates from Alexandria Main University Hospital. Through analysis of these genomes, as well as all available S. aureus and S. haemolyticus genomes from the Middle East (n = 40), we were able to produce a picture of the diversity in this region more complete than those afforded by traditional molecular typing strategies. For example, we identified 4 new MLSTs. Most strains harbored genes associated with multidrug resistance, toxin production, biofilm formation, and immune evasion. These data provide invaluable insight for future antibiotic stewardship and infection control within the Middle East.}, } @article {pmid35722513, year = {2022}, author = {Parakkunnel, R and Bhojaraja Naik, K and Susmita, C and Girimalla, V and Bhaskar, KU and Sripathy, KV and Shantharaja, CS and Aravindan, S and Kumar, S and Lakhanpaul, S and Bhat, KV}, title = {Evolution and co-evolution: insights into the divergence of plant heat shock factor genes.}, journal = {Physiology and molecular biology of plants : an international journal of functional plant biology}, volume = {28}, number = {5}, pages = {1029-1047}, pmid = {35722513}, issn = {0971-5894}, abstract = {UNLABELLED: The Heat Shock Factor (Hsf) genes are widely distributed across the plant kingdom regulating the plant response to various abiotic stresses. In addition to natural selection, breeding and accelerated selection changed the structure and function of Hsf genes. 1076 Hsf genes from 30 genera from primitive algae to the most advanced plant species and major crop plants were used for phylogenetic analysis. The interspecific divergence was studied with 11 members of genus Oryza while intraspecific divergence was studied with sesame pan-genome adapted to diverse ecological niches. B2 genes in eudicots and monocots originated separately while A1 gave rise to the recently evolved Class-C genes and land colonization happened with evolution of A1 genes. An increase in the number of lineages in the Oryza clade with the evolution of AA genome indicated independent domestication and positive selection was observed in > 53% of loci whereas the highly conserved homologues were under purifying selection. The paralogous genes under positive selection exhibited more domain changes for diversified function and increased fitness. A significant co-evolving cluster involving amino acids Phenylalanine, Lysine and Valine played crucial role in maintaining hydrophobic core along with highly conserved Tryptophan residues. A mutation of Glutamic acid to Glutamine was observed in A8 genes of Lamiales affecting protein solvency. Breeding resulted in accumulation of mutations reducing the hydrophobicity of proteins and a further reduction in protein aggregation. This study identify genome duplications, non-neutral selection and co-evolving residues as causing drastic changes in the conserved domain of Hsf proteins.

SUPPLEMENTARY INFORMATION: The online version contains supplementary material available at 10.1007/s12298-022-01183-7.}, } @article {pmid35722315, year = {2022}, author = {Podrzaj, L and Burtscher, J and Domig, KJ}, title = {Comparative Genomics Provides Insights Into Genetic Diversity of Clostridium tyrobutyricum and Potential Implications for Late Blowing Defects in Cheese.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {889551}, pmid = {35722315}, issn = {1664-302X}, abstract = {Clostridium tyrobutyricum has been recognized as the main cause of late blowing defects (LBD) in cheese leading to considerable economic losses for the dairy industry. Although differences in spoilage ability among strains of this species have been acknowledged, potential links to the genetic diversity and functional traits remain unknown. In the present study, we aimed to investigate and characterize genomic variation, pan-genomic diversity and key traits of C. tyrobutyricum by comparing the genomes of 28 strains. A comparative genomics analysis revealed an "open" pangenome comprising 9,748 genes and a core genome of 1,179 genes shared by all test strains. Among those core genes, the majority of genes encode proteins related to translation, ribosomal structure and biogenesis, energy production and conversion, and amino acid metabolism. A large part of the accessory genome is composed of sets of unique, strain-specific genes ranging from about 5 to more than 980 genes. Furthermore, functional analysis revealed several strain-specific genes related to replication, recombination and repair, cell wall, membrane and envelope biogenesis, and defense mechanisms that might facilitate survival under stressful environmental conditions. Phylogenomic analysis divided strains into two clades: clade I contained human, mud, and silage isolates, whereas clade II comprised cheese and milk isolates. Notably, these two groups of isolates showed differences in certain hypothetical proteins, transcriptional regulators and ABC transporters involved in resistance to oxidative stress. To the best of our knowledge, this is the first study to provide comparative genomics of C. tyrobutyricum strains related to LBD. Importantly, the findings presented in this study highlight the broad genetic diversity of C. tyrobutyricum, which might help us understand the diversity in spoilage potential of C. tyrobutyricum in cheese and provide some clues for further exploring the gene modules responsible for the spoilage ability of this species.}, } @article {pmid35720548, year = {2022}, author = {Wang, Y and Habekuß, A and Jayakodi, M and Mascher, M and Snowdon, RJ and Stahl, A and Fuß, J and Ordon, F and Perovic, D}, title = {High-Resolution Mapping of Barley mild mosaic virus Resistance Gene rym15.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {908170}, pmid = {35720548}, issn = {1664-462X}, abstract = {Barley yellow mosaic virus (BaYMV) and Barley mild mosaic virus (BaMMV), which are transmitted by the soil-borne plasmodiophorid Polymyxa graminis, cause high yield losses in barley. In previous studies, the recessive BaMMV resistance gene rym15, derived from the Japanese landrace Chikurin Ibaraki 1, was mapped on chromosome 6HS of Hordeum vulgare. In this study, 423 F4 segmental recombinant inbred lines (RILs) were developed from crosses of Chikurin Ibaraki 1 with two BaMMV-susceptible cultivars, Igri (139 RILs) and Uschi (284 RILs). A set of 32 competitive allele-specific PCR (KASP) assays, designed using single nucleotide polymorphisms (SNPs) from the barley 50 K Illumina Infinium iSelect SNP chip, genotyping by sequencing (GBS) and whole-genome sequencing (WGS), was used as a backbone for construction of two high-resolution maps. Using this approach, the target locus was narrowed down to 0.161 cM and 0.036 cM in the Igri × Chikurin Ibaraki 1 (I × C) and Chikurin Ibaraki 1 × Uschi (C × U) populations, respectively. Corresponding physical intervals of 11.3 Mbp and 0.281 Mbp were calculated for I × C and C × U, respectively, according to the Morex v3 genome sequence. In the 0.281 Mbp target region, six high confidence (HC) and two low confidence (LC) genes were identified. Genome assemblies of BaMMV-susceptible cultivars Igri and Golden Promise from the barley pan-genome, and a HiFi assembly of Chikurin Ibaraki 1 together with re-sequencing data for the six HC and two LC genes in susceptible parental cultivar Uschi revealed functional SNPs between resistant and susceptible genotypes only in two of the HC genes. These SNPs are the most promising candidates for the development of functional markers and the two genes represent promising candidates for functional analysis.}, } @article {pmid35720310, year = {2022}, author = {Tan, YC and Lahiri, C}, title = {Promising Acinetobacter baumannii Vaccine Candidates and Drug Targets in Recent Years.}, journal = {Frontiers in immunology}, volume = {13}, number = {}, pages = {900509}, pmid = {35720310}, issn = {1664-3224}, mesh = {*Acinetobacter baumannii ; Anti-Bacterial Agents/pharmacology ; Bacterial Vaccines ; Computational Biology/methods ; Molecular Docking Simulation ; }, abstract = {In parallel to the uncontrolled use of antibiotics, the emergence of multidrug-resistant bacteria, like Acinetobacter baumannii, has posed a severe threat. A. baumannii predominates in the nosocomial setting due to its ability to persist in hospitals and survive antibiotic treatment, thereby eventually leading to an increasing prevalence and mortality due to its infection. With the increasing spectra of drug resistance and the incessant collapse of newly discovered antibiotics, new therapeutic countermeasures have been in high demand. Hence, recent research has shown favouritism towards the long-term solution of designing vaccines. Therefore, being a realistic alternative strategy to combat this pathogen, anti-A. Baumannii vaccines research has continued unearthing various antigens with variable results over the last decade. Again, other approaches, including pan-genomics, subtractive proteomics, and reverse vaccination strategies, have shown promise for identifying promiscuous core vaccine candidates that resulted in chimeric vaccine constructs. In addition, the integration of basic knowledge of the pathobiology of this drug-resistant bacteria has also facilitated the development of effective multiantigen vaccines. As opposed to the conventional trial-and-error approach, incorporating the in silico methods in recent studies, particularly network analysis, has manifested a great promise in unearthing novel vaccine candidates from the A. baumannii proteome. Some studies have used multiple A. baumannii data sources to build the co-functional networks and analyze them by k-shell decomposition. Additionally, Whole Genomic Protein Interactome (GPIN) analysis has utilized a rational approach for identifying essential proteins and presenting them as vaccines effective enough to combat the deadly pathogenic threats posed by A. baumannii. Others have identified multiple immune nodes using network-based centrality measurements for synergistic antigen combinations for different vaccination strategies. Protein-protein interactions have also been inferenced utilizing structural approaches, such as molecular docking and molecular dynamics simulation. Similar workflows and technologies were employed to unveil novel A. baumannii drug targets, with a similar trend in the increasing influx of in silico techniques. This review integrates the latest knowledge on the development of A. baumannii vaccines while highlighting the in silico methods as the future of such exploratory research. In parallel, we also briefly summarize recent advancements in A. baumannii drug target research.}, } @article {pmid35714801, year = {2022}, author = {Nanjani, S and Soni, R and Paul, D and Keharia, H}, title = {Genome analysis uncovers the prolific antagonistic and plant growth-promoting potential of endophyte Bacillus velezensis K1.}, journal = {Gene}, volume = {836}, number = {}, pages = {146671}, doi = {10.1016/j.gene.2022.146671}, pmid = {35714801}, issn = {1879-0038}, mesh = {*Bacillus/genetics ; Bacillus subtilis/physiology ; Biological Control Agents ; *Endophytes/chemistry/genetics ; Genome, Bacterial ; Plant Diseases/microbiology ; Soil ; }, abstract = {Insights into the application of endophytic bacilli in sustainable agricultural practices have opened up new avenues for the inhibition of soil-borne pathogens and the improvement of plant health. Bacillus subtilis K1, an endophytic bacterium originally isolated from aerial roots of Ficus benghalensis is a potential biocontrol agent secreting a mixture of surfactins, iturins and fengycins. The current study extends the characterization of this bacterium through genomic and comparative genomics approaches. The sequencing of the bacterial genome at Illumina MiSeq platform revealed that it possessed a 4,103,502-bp circular chromosome with 45.98% GC content and 4325 predicted protein-coding sequences. Based on phylogenomics and whole-genome average nucleotide identity, the B. subtilis K1 was taxonomically classified as Bacillus velezensis. The formerly evaluated phenotypic traits viz. C-source utilization and lipopeptide-mediated fungal antagonism were correlated to their molecular determinants. The genome also harbored several genes associated with induced systemic resistance and plant growth promotion i.e, phytohormone production, nitrogen assimilation and reduction, siderophore production, phosphate solubilization, biofilm formation, swarming motility, acetoin and butanediol synthesis. The production of antifungal volatile organic compounds and plant growth promotion was experimentally demonstrated by volatile compound assay and seed germination assay on cumin and groundnut. The isolate also holds great prospects for application as a soil inoculant as indicated by enhancement in the growth of groundnut via in planta pot studies. Bacterial pan-genome analysis based on a comparison of whole genomes with eighteen other Bacillus strains was also conducted. Comparative examination of biosynthetic gene clusters across all genomes indicated that the largest number of gene clusters were harbored by the K1 genome. Based on the findings, we propose K1 as a model for scrutinizing non-ribosomally synthesized peptide synthetase and polyketide synthetase derived molecules.}, } @article {pmid35712352, year = {2022}, author = {Posada-Reyes, AB and Balderas-Martínez, YI and Ávila-Ríos, S and Vinuesa, P and Fonseca-Coronado, S}, title = {An Epistatic Network Describes oppA and glgB as Relevant Genes for Mycobacterium tuberculosis.}, journal = {Frontiers in molecular biosciences}, volume = {9}, number = {}, pages = {856212}, pmid = {35712352}, issn = {2296-889X}, abstract = {Mycobacterium tuberculosis is an acid-fast bacterium that causes tuberculosis worldwide. The role of epistatic interactions among different loci of the M. tuberculosis genome under selective pressure may be crucial for understanding the disease and the molecular basis of antibiotic resistance acquisition. Here, we analyzed polymorphic loci interactions by applying a model-free method for epistasis detection, SpydrPick, on a pan-genome-wide alignment created from a set of 254 complete reference genomes. By means of the analysis of an epistatic network created with the detected epistatic interactions, we found that glgB (α-1,4-glucan branching enzyme) and oppA (oligopeptide-binding protein) are putative targets of co-selection in M. tuberculosis as they were associated in the network with M. tuberculosis genes related to virulence, pathogenesis, transport system modulators of the immune response, and antibiotic resistance. In addition, our work unveiled potential pharmacological applications for genotypic antibiotic resistance inherent to the mutations of glgB and oppA as they epistatically interact with fprA and embC, two genes recently included as antibiotic-resistant genes in the catalog of the World Health Organization. Our findings showed that this approach allows the identification of relevant epistatic interactions that may lead to a better understanding of M. tuberculosis by deciphering the complex interactions of molecules involved in its metabolism, virulence, and pathogenesis and that may be applied to different bacterial populations.}, } @article {pmid35710371, year = {2022}, author = {Tantoso, E and Eisenhaber, B and Kirsch, M and Shitov, V and Zhao, Z and Eisenhaber, F}, title = {To kill or to be killed: pangenome analysis of Escherichia coli strains reveals a tailocin specific for pandemic ST131.}, journal = {BMC biology}, volume = {20}, number = {1}, pages = {146}, pmid = {35710371}, issn = {1741-7007}, mesh = {Escherichia coli/genetics/metabolism ; *Escherichia coli Infections/epidemiology/microbiology ; *Escherichia coli Proteins/genetics ; Genome, Bacterial ; Humans ; Pandemics ; Phylogeny ; Prophages ; }, abstract = {BACKGROUND: Escherichia coli (E. coli) has been one of the most studied model organisms in the history of life sciences. Initially thought just to be commensal bacteria, E. coli has shown wide phenotypic diversity including pathogenic isolates with great relevance to public health. Though pangenome analysis has been attempted several times, there is no systematic functional characterization of the E. coli subgroups according to the gene profile.

RESULTS: Systematically scanning for optimal parametrization, we have built the E. coli pangenome from 1324 complete genomes. The pangenome size is estimated to be ~25,000 gene families (GFs). Whereas the core genome diminishes as more genomes are added, the softcore genome (≥95% of strains) is stable with ~3000 GFs regardless of the total number of genomes. Apparently, the softcore genome (with a 92% or 95% generation threshold) can define the genome of a bacterial species listing the critically relevant, evolutionarily most conserved or important classes of GFs. Unsupervised clustering of common E. coli sequence types using the presence/absence GF matrix reveals distinct characteristics of E. coli phylogroups B1, B2, and E. We highlight the bi-lineage nature of B1, the variation of the secretion and of the iron acquisition systems in ST11 (E), and the incorporation of a highly conserved prophage into the genome of ST131 (B2). The tail structure of the prophage is evolutionarily related to R2-pyocin (a tailocin) from Pseudomonas aeruginosa PAO1. We hypothesize that this molecular machinery is highly likely to play an important role in protecting its own colonies; thus, contributing towards the rapid rise of pandemic E. coli ST131.

CONCLUSIONS: This study has explored the optimized pangenome development in E. coli. We provide complete GF lists and the pangenome matrix as supplementary data for further studies. We identified biological characteristics of different E. coli subtypes, specifically for phylogroups B1, B2, and E. We found an operon-like genome region coding for a tailocin specific for ST131 strains. The latter is a potential killer weapon providing pandemic E. coli ST131 with an advantage in inter-bacterial competition and, suggestively, explains their dominance as human pathogen among E. coli strains.}, } @article {pmid35708861, year = {2022}, author = {Mohanty, JK and Jha, UC and Dixit, GP and Parida, SK}, title = {Harnessing the hidden allelic diversity of wild Cicer to accelerate genomics-assisted chickpea crop improvement.}, journal = {Molecular biology reports}, volume = {49}, number = {6}, pages = {5697-5715}, pmid = {35708861}, issn = {1573-4978}, mesh = {Alleles ; *Cicer/genetics ; Genome, Plant/genetics ; Genomics ; Plant Breeding ; }, abstract = {Chickpea, commonly called Bengal gram or Garbanzo bean, faces a productivity crisis around the globe due to numerous biotic and abiotic stresses. The eroded genetic base of the cultivated Cicer gene pool is becoming a significant bottleneck in developing stress-resilient chickpea cultivars. In this scenario, the crop wild relatives (CWR) of chickpea, with the useful genomic wealth of their wild adaptation, give a ray of hope to improve the genetic background of the cultivated Cicer gene pool. To extrapolate these unearthed genomic diversities of wild, we require a thorough understanding of the pre-historic domestication episodes that are changing their shape with the expansion of the available scientific evidence. Keeping aforesaid in view, the current review article provides a glimpsed overview on several efforts done so far to reveal the mysterious origin and evolution of the Cicer gene pool, along with the constraints in their utilization for chickpea crop improvement. It encapsulates various stress-resilient CWR of chickpea and their use in several pre-breeding programs to develop numerous breeding populations for crop genetic enhancement. Further, this review will recapitulate the significant contributions of structural, functional and comparative genomics, pan-genomics and diverse genomics-assisted breeding strategy in dissecting the untapped trait-specific allelic/gene diversity and domestication pattern behind the CWR of chickpea, along with their potential and promises. We expect the newly explored genetic variations may be used in the breeding programs for re-wilding the cultigens' genomic background to open a new avenue for genetic gain and crop improvement capacity of chickpea.}, } @article {pmid35705841, year = {2022}, author = {Kong, X and Wang, H and Guo, G and Li, P and Tong, P and Liu, M and Ma, X and Dong, C and Li, Y and Zhang, H and Zhang, W}, title = {Duck sewage source coliphage P762 can lyse STEC and APEC.}, journal = {Virus genes}, volume = {58}, number = {5}, pages = {436-447}, pmid = {35705841}, issn = {1572-994X}, support = {U1803109//Innovative Research Group Project of the National Natural Science Foundation of China/ ; BE2017654//Collaborative Innovation Center for Modern Science and Technology and Industrial Development of Jiangxi Traditional Medicine/ ; gxyq2019201//The project of supporting outstanding young talents in universities of anhui province/ ; wzykjtd202002//Wuhu Institute of Technology level science and technology team/ ; 2020jxtd282//Animal epidemic prevention and quarantine teaching team of Anhui quality engineering project/ ; }, mesh = {Agar ; Animals ; Anti-Bacterial Agents ; *Bacteriophages/genetics ; Coliphages/genetics ; Ducks ; *Escherichia coli Infections/microbiology/prevention & control ; Sewage ; *Shiga-Toxigenic Escherichia coli/genetics ; }, abstract = {Multiple pathogenic types or serotypes restrict treatment for colibacillosis. In addition, rising antibiotic resistance has heightened public awareness to prevent and control pathogenic Escherichia coli. The bacteriophage is a viable technique to treat colibacillosis as an alternative to antibiotics. P762, a coliphage isolated from duck farm sewage, was demonstrated to cloud lyse Shiga toxin-producing Escherichia Coli serotypes O157 and non-O157 (17/39), Avian pathogenic E. coli covered serotype O78, O83, and O9 (5/19), and other pathogenic Escherichia coli (5/17). Additional fundamental biological characteristics analysis revealed that P762 is stable at pH 3 ~ 11 and temperature between 4 °C and 60 °C, and its optimum multiplicity of infection (MOI) is 0.1. The one-step curve of P762 exhibited three bursts of growth stage: two rapid and one slow stage. Furthermore, the first rapid burst size is 80 CFU/PFU, the burst size of the slow stage is 10 CFU/PFU, and the second rapid burst size is about 990 CFU/PFU. In addition, P762 can form a "halo" on a double agar plate, implying that the phage secretes depolymerase. With 95.14% identity and 90% query coverage, genome sequence analysis revealed that P762 is most closely related to Escherichia phage DY1, which belongs to the genus Kayfunavirus. After screening using RAST and VFDB, no virulence factors were discovered in P762. In vitro antibacterial tests revealed that P762 has high bactericidal activity in lettuce leaves contaminated with STEC. In conclusion, phage P762 might be employed in the future to prevent and control pathogenic Escherichia coli.}, } @article {pmid35699368, year = {2022}, author = {De Oliveira, AL and Srivastava, A and Espada-Hinojosa, S and Bright, M}, title = {The complete and closed genome of the facultative generalist Candidatus Endoriftia persephone from deep-sea hydrothermal vents.}, journal = {Molecular ecology resources}, volume = {22}, number = {8}, pages = {3106-3123}, pmid = {35699368}, issn = {1755-0998}, support = {31543-B29//Austrian Science Fund/ ; }, mesh = {DNA Restriction-Modification Enzymes/genetics ; Epigenesis, Genetic ; *Hydrothermal Vents ; Sulfur ; Symbiosis/genetics ; Transposases/genetics ; }, abstract = {The mutualistic interactions between Riftia pachyptila and its endosymbiont Candidatus Endoriftia persephone (short Endoriftia) have been extensively researched. However, the closed Endoriftia genome is still lacking. Here, by employing single-molecule real-time sequencing we present the closed chromosomal sequence of Endoriftia. In contrast to theoretical predictions of enlarged and mobile genetic element-rich genomes related to facultative endosymbionts, the closed Endoriftia genome is streamlined with fewer than expected coding sequence regions, insertion-, prophage-sequences and transposase-coding sequences. Automated and manually curated functional analyses indicated that Endoriftia is more versatile regarding sulphur metabolism than previously reported. We identified the presence of two identical rRNA operons and two long CRISPR regions in the closed genome. Additionally, pangenome analyses revealed the presence of three types of secretion systems (II, IV and VI) in the different Endoriftia populations indicating lineage-specific adaptations. The in depth mobilome characterization identified the presence of shared genomic islands in the different Endoriftia drafts and in the closed genome, suggesting that the acquisition of foreign DNA predates the geographical dispersal of the different endosymbiont populations. Finally, we found no evidence of epigenetic regulation in Endoriftia, as revealed by gene screenings and absence of methylated modified base motifs in the genome. As a matter of fact, the restriction-modification system seems to be dysfunctional in Endoriftia, pointing to a higher importance of molecular memory-based immunity against phages via spacer incorporation into CRISPR system. The Endoriftia genome is the first closed tubeworm endosymbiont to date and will be valuable for future gene oriented and evolutionary comparative studies.}, } @article {pmid35695507, year = {2022}, author = {Mustapha, MM and Srinivasa, VR and Griffith, MP and Cho, ST and Evans, DR and Waggle, K and Ezeonwuka, C and Snyder, DJ and Marsh, JW and Harrison, LH and Cooper, VS and Van Tyne, D}, title = {Genomic Diversity of Hospital-Acquired Infections Revealed through Prospective Whole-Genome Sequencing-Based Surveillance.}, journal = {mSystems}, volume = {7}, number = {3}, pages = {e0138421}, pmid = {35695507}, issn = {2379-5077}, support = {U01 AI124302/AI/NIAID NIH HHS/United States ; KL2 TR001856/TR/NCATS NIH HHS/United States ; R21Al109459//HHS | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; R01 AI127472/AI/NIAID NIH HHS/United States ; }, mesh = {Humans ; *Genome, Bacterial/genetics ; *Genomics ; Whole Genome Sequencing ; Anti-Bacterial Agents ; Hospitals ; }, abstract = {Healthcare-associated infections (HAIs) cause mortality, morbidity, and waste of health care resources. HAIs are also an important driver of antimicrobial resistance, which is increasing around the world. Beginning in November 2016, we instituted an initiative to detect outbreaks of HAIs using prospective whole-genome sequencing-based surveillance of bacterial pathogens collected from hospitalized patients. Here, we describe the diversity of bacteria sampled from hospitalized patients at a single center, as revealed through systematic analysis of bacterial isolate genomes. We sequenced the genomes of 3,004 bacterial isolates from hospitalized patients collected over a 25-month period. We identified bacteria belonging to 97 distinct species, which were distributed among 14 groups of related species. Within these groups, isolates could be distinguished from one another by both average nucleotide identity (ANI) and principal-component analysis of accessory genes (PCA-A). Core genome genetic distances and rates of evolution varied among species, which has practical implications for defining shared ancestry during outbreaks and for our broader understanding of the origins of bacterial strains and species. Finally, antimicrobial resistance genes and putative mobile genetic elements were frequently observed, and our systematic analysis revealed patterns of occurrence across the different species sampled from our hospital. Overall, this study shows how understanding the population structure of diverse pathogens circulating in a single health care setting can improve the discriminatory power of genomic epidemiology studies and can help define the processes leading to strain and species differentiation. IMPORTANCE Hospitalized patients are at increased risk of becoming infected with antibiotic-resistant organisms. We used whole-genome sequencing to survey and compare over 3,000 clinical bacterial isolates collected from hospitalized patients at a large medical center over a 2-year period. We identified nearly 100 different bacterial species, which we divided into 14 different groups of related species. When we examined how genetic relatedness differed between species, we found that different species were likely evolving at different rates within our hospital. This is significant because the identification of bacterial outbreaks in the hospital currently relies on genetic similarity cutoffs, which are often applied uniformly across organisms. Finally, we found that antibiotic resistance genes and mobile genetic elements were abundant and were shared among the bacterial isolates we sampled. Overall, this study provides an in-depth view of the genomic diversity and evolutionary processes of bacteria sampled from hospitalized patients, as well as genetic similarity estimates that can inform hospital outbreak detection and prevention efforts.}, } @article {pmid35695431, year = {2022}, author = {Hwang, Y and Girguis, PR}, title = {Differentiated Evolutionary Strategies of Genetic Diversification in Atlantic and Pacific Thaumarchaeal Populations.}, journal = {mSystems}, volume = {7}, number = {3}, pages = {e0147721}, pmid = {35695431}, issn = {2379-5077}, mesh = {*Ammonia ; Phylogeny ; *Ecosystem ; Oceans and Seas ; Archaea/genetics ; }, abstract = {Some marine microbes are seemingly "ubiquitous," thriving across a wide range of environmental conditions. While the increased depth in metagenomic sequencing has led to a growing body of research on within-population heterogeneity in environmental microbial populations, there have been fewer systematic comparisons and characterizations of population-level genetic diversity over broader expanses of time and space. Here, we investigated the factors that govern the diversification of ubiquitous microbial taxa found within and between ocean basins. Specifically, we use mapped metagenomic paired reads to examine the genetic diversity of ammonia-oxidizing archaeal ("Candidatus Nitrosopelagicus brevis") populations in the Pacific (Hawaii Ocean Time-series [HOT]) and Atlantic (Bermuda Atlantic Time Series [BATS]) Oceans sampled over 2 years. We observed higher nucleotide diversity in "Ca. N. brevis" at HOT, driven by a higher rate of homologous recombination. In contrast, "Ca. N. brevis" at BATS featured a more open pangenome with a larger set of genes that were specific to BATS, suggesting a history of dynamic gene gain and loss events. Furthermore, we identified highly differentiated genes that were regulatory in function, some of which exhibited evidence of recent selective sweeps. These findings indicate that different modes of genetic diversification likely incur specific adaptive advantages depending on the selective pressures that they are under. Within-population diversity generated by the environment-specific strategies of genetic diversification is likely key to the ecological success of "Ca. N. brevis." IMPORTANCE Ammonia-oxidizing archaea (AOA) are one of the most abundant chemolithoautotrophic microbes in the marine water column and are major contributors to global carbon and nitrogen cycling. Despite their ecological importance and geographical pervasiveness, there have been limited systematic comparisons and characterizations of their population-level genetic diversity over time and space. Here, we use metagenomic time series from two ocean observatories to address the fundamental questions of how abiotic and biotic factors shape the population-level genetic diversity and how natural microbial populations adapt across diverse habitats. We show that the marine AOA "Candidatus Nitrosopelagicus brevis" in different ocean basins exhibits distinct modes of genetic diversification in response to their selective regimes shaped by nutrient availability and patterns of environmental fluctuations. Our findings specific to "Ca. N. brevis" have broader implications, particularly in understanding the population-level responses to the changing climate and predicting its impact on biogeochemical cycles.}, } @article {pmid35690455, year = {2022}, author = {Palma, F and Radomski, N and Guérin, A and Sévellec, Y and Félix, B and Bridier, A and Soumet, C and Roussel, S and Guillier, L}, title = {Genomic elements located in the accessory repertoire drive the adaptation to biocides in Listeria monocytogenes strains from different ecological niches.}, journal = {Food microbiology}, volume = {106}, number = {}, pages = {103757}, doi = {10.1016/j.fm.2021.103757}, pmid = {35690455}, issn = {1095-9998}, mesh = {Animals ; Benzalkonium Compounds/pharmacology ; Chlorides ; *Disinfectants/pharmacology ; Drug Resistance, Bacterial/genetics ; Ecosystem ; Genomics ; *Listeria monocytogenes ; }, abstract = {In response to the massive use of biocides for controlling Listeria monocytogenes (hereafter Lm) contaminations along the food chain, strains showing biocide tolerance emerged. Here, accessory genomic elements were associated with biocide tolerance through pangenome-wide associations performed on 197 Lm strains from different lineages, ecological, geographical and temporal origins. Mobile elements, including prophage-related loci, the Tn6188_qacH transposon and pLMST6_emrC plasmid, were widespread across lineage I and II food strains and associated with tolerance to benzalkonium-chloride (BC), a quaternary ammonium compound (QAC) widely used in food processing. The pLMST6_emrC was also associated with tolerance to another QAC, the didecyldimethylammonium-chloride, displaying a pleiotropic effect. While no associations were detected for chemically reactive biocides (alcohols and chlorines), genes encoding for cell-surface proteins were associated with BC or polymeric biguanide tolerance. The latter was restricted to lineage I strains from animal and the environment. In conclusion, different genetic markers, with polygenic nature or not, appear to have driven the Lm adaptation to biocide, especially in food strains but also from animal and the environment. These markers could aid to monitor and predict the spread of biocide tolerant Lm genotypes across different ecological niches, finally reducing the risk of such strains in food industrial settings.}, } @article {pmid35685364, year = {2022}, author = {Quan, C and Lu, H and Lu, Y and Zhou, G}, title = {Population-scale genotyping of structural variation in the era of long-read sequencing.}, journal = {Computational and structural biotechnology journal}, volume = {20}, number = {}, pages = {2639-2647}, pmid = {35685364}, issn = {2001-0370}, abstract = {Population-scale studies of structural variation (SV) are growing rapidly worldwide with the development of long-read sequencing technology, yielding a considerable number of novel SVs and complete gap-closed genome assemblies. Herein, we highlight recent studies using a hybrid sequencing strategy and present the challenges toward large-scale genotyping for SVs due to the reference bias. Genotyping SVs at a population scale remains challenging, which severely impacts genotype-based population genetic studies or genome-wide association studies of complex diseases. We summarize academic efforts to improve genotype quality through linear or graph representations of reference and alternative alleles. Graph-based genotypers capable of integrating diverse genetic information are effectively applied to large and diverse cohorts, contributing to unbiased downstream analysis. Meanwhile, there is still an urgent need in this field for efficient tools to construct complex graphs and perform sequence-to-graph alignments.}, } @article {pmid35684146, year = {2022}, author = {Lin, G and Liu, Q and Wang, L and Li, H and Zhao, J and Zhang, H and Wang, G and Chen, W}, title = {The Comparative Analysis of Genomic Diversity and Genes Involved in Carbohydrate Metabolism of Eighty-Eight Bifidobacterium pseudocatenulatum Isolates from Different Niches of China.}, journal = {Nutrients}, volume = {14}, number = {11}, pages = {}, pmid = {35684146}, issn = {2072-6643}, support = {31972052//National Natural Science Foundation of China/ ; 32021005//National Natural Science Foundation of China/ ; 31820103010//National Natural Science Foundation of China/ ; JUSRP22006//Fundamental Research Funds for the Central Universities/ ; JUSRP51501//Fundamental Research Funds for the Central Universities/ ; }, mesh = {Animals ; *Bifidobacterium pseudocatenulatum/metabolism ; Carbohydrate Metabolism/genetics ; Carbohydrates ; Cattle ; Female ; *Gastrointestinal Microbiome/genetics ; Genomics ; Glycoside Hydrolases/genetics/metabolism ; Humans ; Mice ; }, abstract = {Eighty-eight Bifidobacterium pseudocatenulatum strains, which were isolated from human, chicken and cow fecal samples from different niches of China, were compared genomically in this study to evaluate their diversity. It was found that B. pseudocatenulatum displayed a closed pan-genome, including abundant glycoside hydrolase families of the carbohydrate active enzyme (CAZy). A total of 30 kinds of glycoside hydrolases (GHs), 14 kinds of glycosyl transferases (GTs), 13 kinds of carbohydrate-binding modules (CBMs), 6 kinds of carbohydrate-esterases (CEs), and 2 kinds of auxiliary activities (AAs) gene families were identified across the genomes of the 88 B. pseudocatenulatum strains. Specifically, this showed that significant differences were also present in the number of 10 carbohydrate-active enzyme gene families (GT51, GH13_32, GH26, GH42, GH121, GH3, AA3, CBM46, CE2, and CE6) among the strains derived from the hosts of different age groups, particularly between strains from infants and those from other human age groups. Twelve different individuals of B. pseudocatenulatum from four main clusters were selected for further study to reveal the genetic diversity of carbohydrate metabolism-related genes within the same phylogenetics. The animal experiment showed that 3 weeks of oral administration and 1 week after cessation of administration of these strains did not markedly alter the serum routine inflammatory indicators in mice. Furthermore, the administration of these strains did not significantly cause adverse changes in the gut microbiota, as indicated by the α- and β-diversity indexes, relative to the control group (normal diet). Beyond that, FAHBZ9L5 significantly increased the abundance of B. pseudocatenulatum after 3 weeks and significantly increased the abundance of acetic acid and butyric acid in the host's intestinal tract 3 and 4 weeks after the first administration, respectively, compared with the control group. Corresponding to this, comparative genomic analyses of 12 B. pseudocatenulatum suggest that FAHBZ9L5-specific genes were rich in ABC transporters and carbohydrate esterase. Combining the results of comparative genomics analyses and animal experiment, it is suggested that the strains containing certain gene clusters contribute to another competitive growth advantage of B. pseudocatenulatum, which facilitates its intestinal carbohydrate metabolism in a host.}, } @article {pmid35676474, year = {2022}, author = {Zhou, Y and Zhang, Z and Bao, Z and Li, H and Lyu, Y and Zan, Y and Wu, Y and Cheng, L and Fang, Y and Wu, K and Zhang, J and Lyu, H and Lin, T and Gao, Q and Saha, S and Mueller, L and Fei, Z and Städler, T and Xu, S and Zhang, Z and Speed, D and Huang, S}, title = {Graph pangenome captures missing heritability and empowers tomato breeding.}, journal = {Nature}, volume = {606}, number = {7914}, pages = {527-534}, pmid = {35676474}, issn = {1476-4687}, mesh = {Alleles ; Crops, Agricultural/genetics ; *Genetic Variation ; *Genome, Plant/genetics ; *Genome-Wide Association Study ; Linkage Disequilibrium ; *Solanum lycopersicum/genetics/metabolism ; *Plant Breeding ; }, abstract = {Missing heritability in genome-wide association studies defines a major problem in genetic analyses of complex biological traits[1,2]. The solution to this problem is to identify all causal genetic variants and to measure their individual contributions[3,4]. Here we report a graph pangenome of tomato constructed by precisely cataloguing more than 19 million variants from 838 genomes, including 32 new reference-level genome assemblies. This graph pangenome was used for genome-wide association study analyses and heritability estimation of 20,323 gene-expression and metabolite traits. The average estimated trait heritability is 0.41 compared with 0.33 when using the single linear reference genome. This 24% increase in estimated heritability is largely due to resolving incomplete linkage disequilibrium through the inclusion of additional causal structural variants identified using the graph pangenome. Moreover, by resolving allelic and locus heterogeneity, structural variants improve the power to identify genetic factors underlying agronomically important traits leading to, for example, the identification of two new genes potentially contributing to soluble solid content. The newly identified structural variants will facilitate genetic improvement of tomato through both marker-assisted selection and genomic selection. Our study advances the understanding of the heritability of complex traits and demonstrates the power of the graph pangenome in crop breeding.}, } @article {pmid35672470, year = {2022}, author = {Kim, E and Yang, SM and Kim, IS and Kim, HY}, title = {Identification of novel molecular targets for Weissella species-specific real-time PCR based on pangenome analysis.}, journal = {Applied microbiology and biotechnology}, volume = {106}, number = {11}, pages = {4157-4168}, pmid = {35672470}, issn = {1432-0614}, support = {PJ01662001//Rural Development Administration/ ; }, mesh = {DNA Primers/genetics ; Humans ; RNA, Ribosomal, 16S/genetics ; Real-Time Polymerase Chain Reaction ; Species Specificity ; *Weissella/genetics ; }, abstract = {Some Weissella species are used in probiotic products because of their beneficial effects in humans, whereas some species are considered as opportunistic pathogens that cause infections in humans. Therefore, an accurate and rapid identification of Weissella species is essential to control pathogenic Weissella species or isolate new functional strains with probiotic effects from their habitat. The objective of our study was to extract novel molecular targets using pangenome analysis for the identification of major Weissella species present in food. With 50 genomes representing 11 Weissella species, novel molecular targets were mined based on their 100% presence in the respective strains of the target species and absence in the strains of non-target bacteria. Primers based on molecular targets showed positive results for the corresponding species, whereas 79 non-target strains showed negative results. Standard curves revealed good linearity in the range of 10[3]-10[8] colony-forming units per reaction. Our method was successfully applied to 74 Weissella strains isolated from food samples to demonstrate that the molecular targets provided a viable alternative to the 16S rRNA sequence. Furthermore, it was possible to identify and quantify Weissella communities in fermented foods. These results demonstrate that our method can be used for effective and accurate screening for the presence of Weissella species in foods. KEY POINTS: • This is first study to mine novel targets for differentiating 11 Weissella species. • The novel targets showed higher resolution than the 16S rRNA gene sequence. • The PCR method effectively detected Weissella species with opposing properties.}, } @article {pmid35668795, year = {2022}, author = {Sun, Y and Zhang, PT and Kou, DR and Han, YC and Fang, JC and Ni, JP and Jiang, B and Wang, X and Zhang, YJ and Wang, W and Kong, XD}, title = {Terpene Synthases in Rice Pan-Genome and Their Responses to Chilo suppressalis Larvae Infesting.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {905982}, pmid = {35668795}, issn = {1664-462X}, abstract = {Terpene synthase (TPS) catalyzes the synthesis of terpenes and plays an important role in plant defense. This study identified 45 OsTPS genes (32 core genes and 13 variable genes) based on the high-quality rice gene-based pan-genome. This indicates limitations in OsTPS gene studies based on a single reference genome. In the present study, through collinearity between multiple rice genomes, one OsTPS gene absent in the reference (Nipponbare) genome was found and two TPS genes in the reference genome were found to have atypical structures, which would have been ignored in single genome analysis. OsTPS genes were divided into five groups and TPS-b was lost according to the phylogenetic tree. OsTPSs in TPS-c and TPS-g were all core genes indicating these two groups were stable during domestication. In addition, through the analysis of transcriptome data, some structural variations were found to affect the expression of OsTPS genes. Through the Ka/Ks calculation of OsTPS genes, we found that different OsTPS genes were under different selection pressure during domestication; for example, OsTPS22 and OsTPS29 experienced stronger positive selection than the other OsTPS genes. After Chilo suppressalis larvae infesting, 25 differentially expressed OsTPS genes were identified, which are involved in the diterpene phytoalexins precursors biosynthesis and ent-kaurene biosynthesis pathways. Overall, the present study conducted a bioinformatics analysis of OsTPS genes using a high-quality rice pan-genome, which provided a basis for further study of OsTPS genes.}, } @article {pmid35664542, year = {2022}, author = {Orlando, F and Romanel, A and Trujillo, B and Sigouros, M and Wetterskog, D and Quaini, O and Leone, G and Xiang, JZ and Wingate, A and Tagawa, S and Jayaram, A and Linch, M and , and Jamal-Hanjani, M and Swanton, C and Rubin, MA and Wyatt, AW and Beltran, H and Attard, G and Demichelis, F}, title = {Allele-informed copy number evaluation of plasma DNA samples from metastatic prostate cancer patients: the PCF_SELECT consortium assay.}, journal = {NAR cancer}, volume = {4}, number = {2}, pages = {zcac016}, pmid = {35664542}, issn = {2632-8674}, support = {R37 CA241486/CA/NCI NIH HHS/United States ; }, abstract = {Sequencing of cell-free DNA (cfDNA) in cancer patients' plasma offers a minimally-invasive solution to detect tumor cell genomic alterations to aid real-time clinical decision-making. The reliability of copy number detection decreases at lower cfDNA tumor fractions, limiting utility at earlier stages of the disease. To test a novel strategy for detection of allelic imbalance, we developed a prostate cancer bespoke assay, PCF_SELECT, that includes an innovative sequencing panel covering ∼25 000 high minor allele frequency SNPs and tailored analytical solutions to enable allele-informed evaluation. First, we assessed it on plasma samples from 50 advanced prostate cancer patients. We then confirmed improved detection of genomic alterations in samples with <10% tumor fractions when compared against an independent assay. Finally, we applied PCF_SELECT to serial plasma samples intensively collected from three patients previously characterized as harboring alterations involving DNA repair genes and consequently offered PARP inhibition. We identified more extensive pan-genome allelic imbalance than previously recognized in prostate cancer. We confirmed high sensitivity detection of BRCA2 allelic imbalance with decreasing tumor fractions resultant from treatment and identified complex ATM genomic states that may be incongruent with protein losses. Overall, we present a framework for sensitive detection of allele-specific copy number changes in cfDNA.}, } @article {pmid35663888, year = {2022}, author = {Yero, D and Jia, B and Gao, F}, title = {Editorial: Insights in Evolutionary and Genomic Microbiology: 2021.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {915593}, pmid = {35663888}, issn = {1664-302X}, } @article {pmid35663880, year = {2022}, author = {Surachat, K and Kantachote, D and Wonglapsuwan, M and Chukamnerd, A and Deachamag, P and Mittraparp-Arthorn, P and Jeenkeawpiam, K}, title = {Complete Genome Sequence of Weissella cibaria NH9449 and Comprehensive Comparative-Genomic Analysis: Genomic Diversity and Versatility Trait Revealed.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {826683}, pmid = {35663880}, issn = {1664-302X}, abstract = {Lactic acid bacteria (LAB) in the genus Weissella spp. contain traits in their genome that confer versatility. In particular, Weissella cibaria encodes several beneficial genes that are useful in biotechnological applications. The complete genome of W. cibaria NH9449 was sequenced and an in silico comparative analysis was performed to gain insight into the genomic diversity among members of the genus Weissella. A total of 219 Weissella genomes were used in a bioinformatics analysis of pan-genomes, phylogenetics, self-defense mechanisms, virulence factors, antimicrobial resistance, and carbohydrate-active enzymes. These investigations showed that the strain NH9449 encodes several restriction-modification-related genes and a CRISPR-Cas region in its genome. The identification of carbohydrate-active enzyme-encoding genes indicated that this strain could be beneficial in biotechnological applications. The comparative genomic analysis reveals the very high genomic diversity in this genus, and some marked differences in genetic variation and genes among Weissella species. The calculated average amino acid identity (AAI) and phylogenetic analysis of core and accessory genes shows the possible existence of three new species in this genus. These new genomic insights into Weissella species and their biological functions could be useful in the food industry and other applications.}, } @article {pmid35663029, year = {2022}, author = {Schulz, T and Wittler, R and Stoye, J}, title = {Sequence-based pangenomic core detection.}, journal = {iScience}, volume = {25}, number = {6}, pages = {104413}, pmid = {35663029}, issn = {2589-0042}, abstract = {One of the most basic kinds of analysis to be performed on a pangenome is the detection of its core, i.e., the information shared among all members. Pangenomic core detection is classically done on the gene level and many tools focus exclusively on core detection in prokaryotes. Here, we present a new method for sequence-based pangenomic core detection. Our model generalizes from a strict core definition allowing us to flexibly determine suitable core properties depending on the research question and the dataset under consideration. We propose an algorithm based on a colored de Bruijn graph that runs in linear time with respect to the number of k-mers in the graph. An implementation of our method is called Corer. Because of the usage of a colored de Bruijn graph, it works alignment-free, is provided with a small memory footprint, and accepts as input assembled genomes as well as sequencing reads.}, } @article {pmid35661553, year = {2022}, author = {Carvalho, GG and Calarga, AP and Zorgi, NE and Astudillo-Trujillo, CA and Gontijo, MTP and Brocchi, M and Giorgio, S and Kabuki, DY}, title = {Virulence and DNA sequence analysis of Cronobacter spp. isolated from infant cereals.}, journal = {International journal of food microbiology}, volume = {376}, number = {}, pages = {109745}, doi = {10.1016/j.ijfoodmicro.2022.109745}, pmid = {35661553}, issn = {1879-3460}, mesh = {Aged ; *Cronobacter/genetics ; *Cronobacter sakazakii ; Edible Grain ; Food Microbiology ; Humans ; Infant ; Infant Formula ; Infant, Newborn ; Sequence Analysis, DNA ; Virulence/genetics ; }, abstract = {Cronobacter spp. is an opportunistic pathogen that causes severe infections, affecting newborns and infants, and is also an emerging cause of hospital-acquired infection in elderly populations. These infections are mainly associated with the consumption of infant formulas, even though these bacteria have been isolated from other foods as well. Cronobacter spp. invades epithelial cells and escapes the immune response mechanisms, multiplying inside macrophages. However, the pathogenesis and virulence factors of these bacteria have not been fully elucidated and need to be further studied. Therefore, this study aimed to evaluate the ability of Cronobacter spp. strains isolated from infant cereals to invade and survive within macrophages, investigate the virulence phenotype using the Galleria mellonella model, and identify possible genes involved in bacterial pathogenesis through pan-genome analysis. All the isolates were able to invade macrophages and the survival of bacteria decreased over a 72 h period, with bacterial cell counts reaching up to 10[6] CFU/ml. Cronobacter sakazakii isolate 112 exhibited a similar mortality rate (40-70%) to the ATCC BAA 894 strain (Cronobacter sakazakii) in G. mellonella assay. In addition, some unique virulence genes (isolate 7, ada_2, tcmA_1, acrB_3; isolate 78, ampC_2, rihC_1 and isolate 112, fimH, ylpA, gtrA) were identified within isolates with the invasive profile in the in vivo and in vitro assays. Furthermore, isolates from different species were grouped into seven distinct clusters in the pan-genome analysis. The most virulent isolates (7, 78, and 112) were grouped in distinct subclusters in the cladogram. This work revealed potential Cronobacter spp. pathogenic strains recovered from infant cereals.}, } @article {pmid35657601, year = {2022}, author = {Labarge, B and Hennessy, M and Zhang, L and Goldrich, D and Chartrand, S and Purnell, C and Wright, S and Goldenberg, D and Broach, JR}, title = {Human Papillomavirus Integration Strictly Correlates with Global Genome Instability in Head and Neck Cancer.}, journal = {Molecular cancer research : MCR}, volume = {20}, number = {9}, pages = {1420-1428}, pmid = {35657601}, issn = {1557-3125}, mesh = {*Alphapapillomavirus/genetics ; *Carcinoma, Squamous Cell/genetics ; DNA, Viral/genetics ; Genomic Instability ; *Head and Neck Neoplasms/genetics ; Humans ; *Oropharyngeal Neoplasms/genetics ; Papillomaviridae/genetics ; *Papillomavirus Infections/genetics ; Virus Integration/genetics ; }, abstract = {UNLABELLED: Human papillomavirus (HPV)-positive head and neck cancers, predominantly oropharyngeal squamous cell carcinoma (OPSCC), exhibit epidemiologic, clinical, and molecular characteristics distinct from those OPSCCs lacking HPV. We applied a combination of whole-genome sequencing and optical genome mapping to interrogate the genome structure of HPV-positive OPSCCs. We found that the virus had integrated in the host genome in two thirds of the tumors examined but resided solely extrachromosomally in the other third. Integration of the virus occurred at essentially random sites within the genome. Focal amplification of the virus and the genomic sequences surrounding it often occurred subsequent to integration, with the number of tandem repeats in the chromosome accounting for the increased copy number of the genome sequences flanking the site of integration. In all cases, viral integration correlated with pervasive genome-wide somatic alterations at sites distinct from that of viral integration and comprised multiple insertions, deletions, translocations, inversions, and point mutations. Few or no somatic mutations were present in tumors with only episomal HPV. Our data could be interpreted by positing that episomal HPV is captured in the host genome following an episode of global genome instability during tumor development. Viral integration correlated with higher grade tumors, which may be explained by the associated extensive mutation of the genome and suggests that HPV integration status may inform prognosis.

IMPLICATIONS: Our results indicate that HPV integration in head and neck cancer correlates with extensive pangenomic structural variation, which may have prognostic implications.}, } @article {pmid35653240, year = {2022}, author = {Boatwright, JL and Sapkota, S and Jin, H and Schnable, JC and Brenton, Z and Boyles, R and Kresovich, S}, title = {Sorghum Association Panel whole-genome sequencing establishes cornerstone resource for dissecting genomic diversity.}, journal = {The Plant journal : for cell and molecular biology}, volume = {111}, number = {3}, pages = {888-904}, pmid = {35653240}, issn = {1365-313X}, mesh = {Edible Grain/genetics ; Genome ; Genome-Wide Association Study ; Genomics/methods ; Plant Breeding/methods ; Polymorphism, Single Nucleotide/genetics ; *Sorghum/genetics ; }, abstract = {Association mapping panels represent foundational resources for understanding the genetic basis of phenotypic diversity and serve to advance plant breeding by exploring genetic variation across diverse accessions. We report the whole-genome sequencing (WGS) of 400 sorghum (Sorghum bicolor (L.) Moench) accessions from the Sorghum Association Panel (SAP) at an average coverage of 38× (25-72×), enabling the development of a high-density genomic marker set of 43 983 694 variants including single-nucleotide polymorphisms (approximately 38 million), insertions/deletions (indels) (approximately 5 million), and copy number variants (CNVs) (approximately 170 000). We observe slightly more deletions among indels and a much higher prevalence of deletions among CNVs compared to insertions. This new marker set enabled the identification of several novel putative genomic associations for plant height and tannin content, which were not identified when using previous lower-density marker sets. WGS identified and scored variants in 5-kb bins where available genotyping-by-sequencing (GBS) data captured no variants, with half of all bins in the genome falling into this category. The predictive ability of genomic best unbiased linear predictor (GBLUP) models was increased by an average of 30% by using WGS markers rather than GBS markers. We identified 18 selection peaks across subpopulations that formed due to evolutionary divergence during domestication, and we found six Fst peaks resulting from comparisons between converted lines and breeding lines within the SAP that were distinct from the peaks associated with historic selection. This population has served and continues to serve as a significant public resource for sorghum research and demonstrates the value of improving upon existing genomic resources.}, } @article {pmid35647330, year = {2022}, author = {Mohite, OS and Lloyd, CJ and Monk, JM and Weber, T and Palsson, BO}, title = {Pangenome analysis of Enterobacteria reveals richness of secondary metabolite gene clusters and their associated gene sets.}, journal = {Synthetic and systems biotechnology}, volume = {7}, number = {3}, pages = {900-910}, pmid = {35647330}, issn = {2405-805X}, abstract = {In silico genome mining provides easy access to secondary metabolite biosynthetic gene clusters (BGCs) encoding the biosynthesis of many bioactive compounds, which are the basis for many important drugs used in human medicine. However, the association between BGCs and other functions encoded in the genomes of producers have remained elusive. Here, we present a systems biology workflow that integrates genome mining with a detailed pangenome analysis for detecting genes associated with a particular BGC. We analyzed 3,889 enterobacterial genomes and found 13,266 BGCs, represented by 252 distinct BGC families and 347 additional singletons. A pangenome analysis revealed 88 genes putatively associated with a specific BGC coding for the colon cancer-related colibactin that code for diverse metabolic and regulatory functions. The presented workflow opens up the possibility to discover novel secondary metabolites, better understand their physiological roles, and provides a guide to identify and analyze BGC associated gene sets.}, } @article {pmid35644986, year = {2022}, author = {Bayer, PE and Petereit, J and Durant, É and Monat, C and Rouard, M and Hu, H and Chapman, B and Li, C and Cheng, S and Batley, J and Edwards, D}, title = {Wheat Panache: A pangenome graph database representing presence-absence variation across sixteen bread wheat genomes.}, journal = {The plant genome}, volume = {15}, number = {3}, pages = {e20221}, doi = {10.1002/tpg2.20221}, pmid = {35644986}, issn = {1940-3372}, mesh = {*Bread ; Genome, Plant ; Plant Breeding ; Sequence Analysis, DNA ; *Triticum/genetics ; }, abstract = {Bread wheat (Triticum aestivum L.) is one of humanity's most important staple crops, characterized by a large and complex genome with a high level of gene presence-absence variation (PAV) between cultivars, hampering genomic approaches for crop improvement. With the growing global population and the increasing impact of climate change on crop yield, there is an urgent need to apply genomic approaches to accelerate wheat breeding. With recent advances in DNA sequencing technology, a growing number of high-quality reference genomes are becoming available, reflecting the genetic content of a diverse range of cultivars. However, information on the presence or absence of genomic regions has been hard to visualize and interrogate because of the size of these genomes and the lack of suitable bioinformatics tools. To address this limitation, we have produced a wheat pangenome graph maintained within an online database to facilitate interrogation and comparison of wheat cultivar genomes. The database allows users to visualize regions of the pangenome to assess PAV between bread wheat genomes.}, } @article {pmid35641504, year = {2022}, author = {Leonard, AS and Crysnanto, D and Fang, ZH and Heaton, MP and Vander Ley, BL and Herrera, C and Bollwein, H and Bickhart, DM and Kuhn, KL and Smith, TPL and Rosen, BD and Pausch, H}, title = {Structural variant-based pangenome construction has low sensitivity to variability of haplotype-resolved bovine assemblies.}, journal = {Nature communications}, volume = {13}, number = {1}, pages = {3012}, pmid = {35641504}, issn = {2041-1723}, mesh = {Animals ; Cattle ; Diploidy ; *Genome/genetics ; Haplotypes ; *High-Throughput Nucleotide Sequencing ; Sequence Analysis, DNA ; }, abstract = {Advantages of pangenomes over linear reference assemblies for genome research have recently been established. However, potential effects of sequence platform and assembly approach, or of combining assemblies created by different approaches, on pangenome construction have not been investigated. Here we generate haplotype-resolved assemblies from the offspring of three bovine trios representing increasing levels of heterozygosity that each demonstrate a substantial improvement in contiguity, completeness, and accuracy over the current Bos taurus reference genome. Diploid coverage as low as 20x for HiFi or 60x for ONT is sufficient to produce two haplotype-resolved assemblies meeting standards set by the Vertebrate Genomes Project. Structural variant-based pangenomes created from the haplotype-resolved assemblies demonstrate significant consensus regardless of sequence platform, assembler algorithm, or coverage. Inspecting pangenome topologies identifies 90 thousand structural variants including 931 overlapping with coding sequences; this approach reveals variants affecting QRICH2, PRDM9, HSPA1A, TAS2R46, and GC that have potential to affect phenotype.}, } @article {pmid35639788, year = {2022}, author = {Garrison, E and Kronenberg, ZN and Dawson, ET and Pedersen, BS and Prins, P}, title = {A spectrum of free software tools for processing the VCF variant call format: vcflib, bio-vcf, cyvcf2, hts-nim and slivar.}, journal = {PLoS computational biology}, volume = {18}, number = {5}, pages = {e1009123}, pmid = {35639788}, issn = {1553-7358}, support = {R01 GM123489/GM/NIGMS NIH HHS/United States ; }, mesh = {Computational Biology ; *Ecosystem ; *Genetic Variation/genetics ; Nucleotides ; Software ; }, abstract = {Since its introduction in 2011 the variant call format (VCF) has been widely adopted for processing DNA and RNA variants in practically all population studies-as well as in somatic and germline mutation studies. The VCF format can represent single nucleotide variants, multi-nucleotide variants, insertions and deletions, and simple structural variants called and anchored against a reference genome. Here we present a spectrum of over 125 useful, complimentary free and open source software tools and libraries, we wrote and made available through the multiple vcflib, bio-vcf, cyvcf2, hts-nim and slivar projects. These tools are applied for comparison, filtering, normalisation, smoothing and annotation of VCF, as well as output of statistics, visualisation, and transformations of files variants. These tools run everyday in critical biomedical pipelines and countless shell scripts. Our tools are part of the wider bioinformatics ecosystem and we highlight best practices. We shortly discuss the design of VCF, lessons learnt, and how we can address more complex variation through pangenome graph formats, variation that can not easily be represented by the VCF format.}, } @article {pmid35639001, year = {2022}, author = {Koide, S and Nagano, Y and Takizawa, S and Sakaguchi, K and Soga, E and Hayashi, W and Tanabe, M and Denda, T and Kimura, K and Arakawa, Y and Nagano, N}, title = {Genomic Traits Associated with Virulence and Antimicrobial Resistance of Invasive Group B Streptococcus Isolates with Reduced Penicillin Susceptibility from Elderly Adults.}, journal = {Microbiology spectrum}, volume = {10}, number = {3}, pages = {e0056822}, pmid = {35639001}, issn = {2165-0497}, mesh = {Adult ; Aged ; Anti-Bacterial Agents/pharmacology ; *Bacteremia ; Drug Resistance, Bacterial/genetics ; Genomics ; Humans ; Infant, Newborn ; Microbial Sensitivity Tests ; Penicillins/pharmacology ; Phylogeny ; *Streptococcal Infections/epidemiology ; Streptococcus agalactiae/genetics ; Virulence/genetics ; }, abstract = {This study aimed to investigate genomic traits underlying the antimicrobial resistance and virulence of multidrug-resistant (MDR) group B streptococci with reduced penicillin susceptibility (PRGBS) recovered from elderly patients with bloodstream infections, which remain poorly characterized. The pangenome was found to be open, with the predicted pan- and core genome sizes being 3,531 and 1,694 genes, respectively. Accessory and unique genes were enriched for the Clusters of Orthologous Groups (COG) categories L, Replication, recombination, and repair, and K, Transcription. All MDR PRGBS isolates retained a core virulence gene repertoire (bibA, fbsA/-B/-C, cspA, cfb, hylB, scpB, lmb, and the cyl operon), supporting an invasive ability similar to that of the other invasive GBS, penicillin-susceptible GBS (PSGBS), and noninvasive PRGBS isolates. The putative sequence type 1 (ST1)-specific AlpST-1 virulence gene was also retained among the serotype Ia/ST1 PRGBS isolates. In addition to tet(M) and erm(B), mef(A)-msr(D) elements or the high-level gentamicin resistance gene aac(6')-aph(2″), which are both rare in PSGBS, were detected among those MDR PRGBS isolates. In the core single-nucleotide polymorphism (SNP) phylogenetic tree, all invasive ST1 PRGBS isolates with serotypes Ia and III were placed together in a clade with a recombination rate of 3.97, which was 36 times higher than the value found for a clade formed by serotype V/ST1 PSGBS isolates derived mostly from human blood. ST1 has been the predominant sequence type among the PRGBS isolates in Japan, and serotypes Ia and III have been very rare among the ST1 PSGBS isolates. Thus, these lineages that mostly consisted of serotypes Ia/ST1 and III/ST1 PRGBS could possibly emerge through recombination within the ST1 populations. IMPORTANCE Streptococcus agalactiae, or group B Streptococcus (GBS), is recognized as the leading cause of neonatal invasive infections. However, an increasing incidence of invasive GBS infections among nonpregnant adults, particularly the elderly and those with underlying diseases, has been observed. There is a trend toward the increasing occurrence of penicillin nonsusceptibility among GBS clinical isolates, from 4.8% in 2008 to 5.8% in 2020 in Japan. Also, in the United States, the frequency of adult invasive GBS isolates suggestive of β-lactam nonsusceptibility increased from 0.7% in 2015 to 1.0% in 2016. In adults, mortality has been significantly higher among patients with bacteremia than among those without bacteremia. Our study revealed that invasive GBS with reduced penicillin susceptibility (PRGBS) isolates harbor major virulence and resistance genes known among GBS, highlighting the need for large population-based genomic surveillance studies to better understand the clinical relevance of invasive PRGBS isolates.}, } @article {pmid35638828, year = {2022}, author = {Shambhu, S and Cella, E and Jubair, M and Azarian, T}, title = {Complete Genome Sequences of Nine Streptococcus pneumoniae Serotype 3 Clonal Complex 180 Strains.}, journal = {Microbiology resource announcements}, volume = {11}, number = {7}, pages = {e0027522}, pmid = {35638828}, issn = {2576-098X}, abstract = {We announce the complete genomes of nine Streptococcus pneumoniae strains belonging to serotype 3 clonal complex 180 (CC180). The genomes consist of a single circularized contig with an average length of 2.033 Mbp. Pangenome analysis identified 1,762 core genes and 412 accessory genes. These genomes are the basis for future population genomic studies.}, } @article {pmid35632394, year = {2022}, author = {Attar, R and Alatawi, EA and Aba Alkhayl, FF and Alharbi, KN and Allemailem, KS and Almatroudi, A}, title = {Immunoinformatics and Biophysics Approaches to Design a Novel Multi-Epitopes Vaccine Design against Staphylococcus auricularis.}, journal = {Vaccines}, volume = {10}, number = {5}, pages = {}, pmid = {35632394}, issn = {2076-393X}, abstract = {Due to the misuse of antibiotics in our daily lives, antimicrobial resistance (AMR) has become a major health problem. Penicillin, the first antibiotic, was used in the 1930s and led to the emergence of AMR. Due to alterations in the microbe's genome and the evolution of new resistance mechanisms, antibiotics are losing efficacy against microbes. There are high rates of mortality and morbidity due to antibiotic resistance, so addressing this major health issue requires new approaches. Staphylococcus auricularis is a Gram-positive cocci and is capable of causing opportunistic infections and sepsis. S. auricularis is resistant to several antibiotics and does not currently have a licensed vaccine. In this study, we used bacterial pan-genome analysis (BPGA) to study S. auricularis pan-genome and applied a reverse immunology approach to prioritize vaccine targets against S. auricularis. A total of 15,444 core proteins were identified by BPGA analysis, which were then used to identify good vaccine candidates considering potential vaccine filters. Two vaccine candidates were evaluated for epitope prediction including the superoxide dismutase and gamma-glutamyl transferase protein. The epitope prediction phase involved the prediction of a variety of B-Cell and T-cell epitopes, and the epitopes that met certain criteria, such as antigenicity, immunogenicity, non-allergenicity, and non-toxicity were chosen. A multi-epitopes vaccine construct was then constructed from all the predicted epitopes, and a cholera toxin B-subunit adjuvant was also added to increase vaccine antigenicity. Three-dimensional models of the vaccine were used for downward analyses. Using the best-modeled structure, binding potency was tested with MHC-I, MHC-II and TLR-4 immune cells receptors, proving that the vaccine binds strongly with the receptors. Further, molecular dynamics simulations interpreted strong intermolecular binding between the vaccine and receptors and confirmed the vaccine epitopes exposed to the host immune system. The results support that the vaccine candidate may be capable of eliciting a protective immune response against S. auricularis and may be a promising candidate for experimental in vitro and in vivo studies.}, } @article {pmid35630466, year = {2022}, author = {Pédron, J and van der Wolf, JM and Portier, P and Caullireau, E and Van Gijsegem, F}, title = {The Broad Host Range Plant Pathogen Dickeya dianthicola Shows a High Genetic Diversity.}, journal = {Microorganisms}, volume = {10}, number = {5}, pages = {}, pmid = {35630466}, issn = {2076-2607}, support = {SPREE (ANR-17-CE32-0004-04)//Agence Nationale de la Recherche/ ; }, abstract = {The wide host range phytopathogen D. dianthicola, first described in ornamentals in the 1950s, rapidly became a threat for potato production in Europe and, more recently, worldwide. Previous genomic analyses, mainly of strains isolated from potato, revealed little sequence diversity. To further analyse D. dianthicola genomic diversity, we used a larger genome panel of 41 isolates encompassing more strains isolated from potato over a wide time scale and more strains isolated from other hosts. The phylogenetic and pan-genomic trees revealed a large cluster of highly related genomes but also the divergence of two more distant strains, IPO 256 and 67.19, isolated from potato and impatiens, respectively, and the clustering of the three strains isolated from Kalanchoe with one more distinct potato strain. An SNP-based minimal spanning tree highlighted both diverse clusters of (nearly) clonal strains and several strains scattered in the MST, irrespective of country or date of isolation, that differ by several thousand SNPs. This study reveals a higher diversity in D. dianthicola than previously described. It indicates the clonal spread of this pathogen over long distances, as suspected from worldwide seed trading, and possible multiple introductions of D. dianthicola from alternative sources of contaminations.}, } @article {pmid35630423, year = {2022}, author = {Hwang, CY and Cho, ES and Yoon, DJ and Cha, IT and Jung, DH and Nam, YD and Park, SL and Lim, SI and Seo, MJ}, title = {Genomic and Physiological Characterization of Metabacillus flavus sp. nov., a Novel Carotenoid-Producing Bacilli Isolated from Korean Marine Mud.}, journal = {Microorganisms}, volume = {10}, number = {5}, pages = {}, pmid = {35630423}, issn = {2076-2607}, support = {Collaborate Research Program//Korea Food Research Institute (KFRI) and the Korean Institute of Geoscience and Mineral Resources (KIGAM)/ ; Research Assistance Program (2020)//Incheon National University/ ; }, abstract = {The newly isolated strain KIGAM252[T] was found to be facultatively anaerobic, Gram-stain-positive, spore-forming, and rod-shaped. They grew at 10-45 °C, pH 6.0-10.0, and were able to tolerate up to 6% NaCl in the growth medium. Phylogenetic analysis indicated that the KIGAM252[T] strain was related to the genus Metabacillus. The cell membrane fatty acid composition of strain KIGAM252[T] included C15:0 anteiso and C15:0 iso (25.6%) as the major fatty acids, and menaquinone 7 was the predominant isoprenoid quinone. The major polar lipids were diphosphatidylglycerol and phosphatidylglycerol. The size of the whole genome was 4.30 Mbp, and the G + C content of the DNA was 43.8%. Average nucleotide and amino acid identity and in silico DNA-DNA hybridization values were below the species delineation threshold. Pan-genomic analysis revealed that 15.8% of all genes present in strain KIGAM252[T] was unique to the strain. The analysis of the secondary biosynthetic pathway predicted the carotenoid synthetic gene cluster in the strain KIGAM252[T]. Based on these current polyphasic taxonomic data, strain KIGAM252[T] represents a novel species of the genus Metabacillus that produces carotenoids, for which we propose the name Metabacillus flavus sp. nov. The type of strain was KIGAM252[T] (=KCTC 43261[T] = JCM 34406[T]).}, } @article {pmid35630358, year = {2022}, author = {Uceda-Campos, G and Feitosa-Junior, OR and Santiago, CRN and Pierry, PM and Zaini, PA and de Santana, WO and Martins-Junior, J and Barbosa, D and Digiampietri, LA and Setubal, JC and da Silva, AM}, title = {Comparative Genomics of Xylella fastidiosa Explores Candidate Host-Specificity Determinants and Expands the Known Repertoire of Mobile Genetic Elements and Immunity Systems.}, journal = {Microorganisms}, volume = {10}, number = {5}, pages = {}, pmid = {35630358}, issn = {2076-2607}, support = {08/11703-4//São Paulo Research Foundation/ ; 3385/2013//Coordenação de Aperfeicoamento de Pessoal de Nível Superior/ ; 11/09409-3//São Paulo Research Foundation/ ; 09/13527-1//São Paulo Research Foundation/ ; 11/01217-8//São Paulo Research Foundation/ ; }, abstract = {Xylella fastidiosa causes diseases in many plant species. Originally confined to the Americas, infecting mainly grapevine, citrus, and coffee, X. fastidiosa has spread to several plant species in Europe causing devastating diseases. Many pathogenicity and virulence factors have been identified, which enable the various X. fastidiosa strains to successfully colonize the xylem tissue and cause disease in specific plant hosts, but the mechanisms by which this happens have not been fully elucidated. Here we present thorough comparative analyses of 94 whole-genome sequences of X. fastidiosa strains from diverse plant hosts and geographic regions. Core-genome phylogeny revealed clades with members sharing mostly a geographic region rather than a host plant of origin. Phylogenetic trees for 1605 orthologous CDSs were explored for potential candidates related to host specificity using a score of mapping metrics. However, no candidate host-specificity determinants were strongly supported using this approach. We also show that X. fastidiosa accessory genome is represented by an abundant and heterogeneous mobilome, including a diversity of prophage regions. Our findings provide a better understanding of the diversity of phylogenetically close genomes and expand the knowledge of X. fastidiosa mobile genetic elements and immunity systems.}, } @article {pmid35630311, year = {2022}, author = {Carter, MQ and Laniohan, N and Lo, CC and Chain, PSG}, title = {Comparative Genomics Applied to Systematically Assess Pathogenicity Potential in Shiga Toxin-Producing Escherichia coli O145:H28.}, journal = {Microorganisms}, volume = {10}, number = {5}, pages = {}, pmid = {35630311}, issn = {2076-2607}, support = {5325-42000-052-00D//Agricultural Research Service/ ; }, abstract = {Shiga toxin-producing Escherichia coli (STEC) O145:H28 can cause severe disease in humans and is a predominant serotype in STEC O145 environmental isolates. Here, comparative genomics was applied to a set of clinical and environmental strains to systematically evaluate the pathogenicity potential in environmental strains. While the core genes-based tree separated all O145:H28 strains from the non O145:H28 reference strains, it failed to segregate environmental strains from the clinical. In contrast, the accessory genes-based tree placed all clinical strains in the same clade regardless of their genotypes or serotypes, apart from the environmental strains. Loss-of-function mutations were common in the virulence genes examined, with a high frequency in genes related to adherence, autotransporters, and the type three secretion system. Distinct differences in pathogenicity islands LEE, OI-122, and OI-57, the acid fitness island, and the tellurite resistance island were detected between the O145:H28 and reference strains. A great amount of genetic variation was detected in O145:H28, which was mainly attributed to deletions, insertions, and gene acquisition at several chromosomal "hot spots". Our study demonstrated a distinct virulence gene repertoire among the STEC O145:H28 strains originating from the same geographical region and revealed unforeseen contributions of loss-of-function mutations to virulence evolution and genetic diversification in STEC.}, } @article {pmid35628779, year = {2022}, author = {Liebal, UW and Ullmann, L and Lieven, C and Kohl, P and Wibberg, D and Zambanini, T and Blank, LM}, title = {Ustilago maydis Metabolic Characterization and Growth Quantification with a Genome-Scale Metabolic Model.}, journal = {Journal of fungi (Basel, Switzerland)}, volume = {8}, number = {5}, pages = {}, pmid = {35628779}, issn = {2309-608X}, support = {FSC-2186//Deutsche Forschungsgemeinschaft/ ; }, abstract = {Ustilago maydis is an important plant pathogen that causes corn smut disease and serves as an effective biotechnological production host. The lack of a comprehensive metabolic overview hinders a full understanding of the organism's environmental adaptation and a full use of its metabolic potential. Here, we report the first genome-scale metabolic model (GSMM) of Ustilago maydis (iUma22) for the simulation of metabolic activities. iUma22 was reconstructed from sequencing and annotation using PathwayTools, and the biomass equation was derived from literature values and from the codon composition. The final model contains over 25% annotated genes (6909) in the sequenced genome. Substrate utilization was corrected by BIOLOG phenotype arrays, and exponential batch cultivations were used to test growth predictions. The growth data revealed a decrease in glucose uptake rate with rising glucose concentration. A pangenome of four different U. maydis strains highlighted missing metabolic pathways in iUma22. The new model allows for studies of metabolic adaptations to different environmental niches as well as for biotechnological applications.}, } @article {pmid35628419, year = {2022}, author = {Edwards, S and León-Zayas, R and Ditter, R and Laster, H and Sheehan, G and Anderson, O and Beattie, T and Mellies, JL}, title = {Microbial Consortia and Mixed Plastic Waste: Pangenomic Analysis Reveals Potential for Degradation of Multiple Plastic Types via Previously Identified PET Degrading Bacteria.}, journal = {International journal of molecular sciences}, volume = {23}, number = {10}, pages = {}, pmid = {35628419}, issn = {1422-0067}, support = {1931150//National Science Foundation/ ; }, mesh = {Bacteria/genetics/metabolism ; *Microbial Consortia ; Plasticizers ; Plastics/metabolism ; *Polyethylene Terephthalates ; }, abstract = {The global utilization of single-use, non-biodegradable plastics, such as bottles made of polyethylene terephthalate (PET), has contributed to catastrophic levels of plastic pollution. Fortunately, microbial communities are adapting to assimilate plastic waste. Previously, our work showed a full consortium of five bacteria capable of synergistically degrading PET. Using omics approaches, we identified the key genes implicated in PET degradation within the consortium's pangenome and transcriptome. This analysis led to the discovery of a novel PETase, EstB, which has been observed to hydrolyze the oligomer BHET and the polymer PET. Besides the genes implicated in PET degradation, many other biodegradation genes were discovered. Over 200 plastic and plasticizer degradation-related genes were discovered through the Plastic Microbial Biodegradation Database (PMBD). Diverse carbon source utilization was observed by a microbial community-based assay, which, paired with an abundant number of plastic- and plasticizer-degrading enzymes, indicates a promising possibility for mixed plastic degradation. Using RNAseq differential analysis, several genes were predicted to be involved in PET degradation, including aldehyde dehydrogenases and several classes of hydrolases. Active transcription of PET monomer metabolism was also observed, including the generation of polyhydroxyalkanoate (PHA)/polyhydroxybutyrate (PHB) biopolymers. These results present an exciting opportunity for the bio-recycling of mixed plastic waste with upcycling potential.}, } @article {pmid35627219, year = {2022}, author = {Wu, XT and Xiong, ZP and Chen, KX and Zhao, GR and Feng, KR and Li, XH and Li, XR and Tian, Z and Huo, FL and Wang, MX and Song, W}, title = {Genome-Wide Identification and Transcriptional Expression Profiles of PP2C in the Barley (Hordeum vulgare L.) Pan-Genome.}, journal = {Genes}, volume = {13}, number = {5}, pages = {}, pmid = {35627219}, issn = {2073-4425}, mesh = {Domestication ; Genes, Plant ; Genome, Plant ; *Hordeum/enzymology/genetics ; *Multigene Family ; Phylogeny ; *Protein Phosphatase 2C/genetics ; }, abstract = {The gene family protein phosphatase 2C (PP2C) is related to developmental processes and stress responses in plants. Barley (Hordeum vulgare L.) is a popular cereal crop that is primarily utilized for human consumption and nutrition. However, there is little knowledge regarding the PP2C gene family in barley. In this study, a total of 1635 PP2C genes were identified in 20 barley pan-genome accessions. Then, chromosome localization, physical and chemical feature predictions and subcellular localization were systematically analyzed. One wild barley accession (B1K-04-12) and one cultivated barley (Morex) were chosen as representatives to further analyze and compare the differences in HvPP2Cs between wild and cultivated barley. Phylogenetic analysis showed that these HvPP2Cs were divided into 12 subgroups. Additionally, gene structure, conserved domain and motif, gene duplication event detection, interaction networks and gene expression profiles were analyzed in accessions Morex and B1K-04-12. In addition, qRT-PCR experiments in Morex indicated that seven HvMorexPP2C genes were involved in the response to aluminum and low pH stresses. Finally, a series of positively selected homologous genes were identified between wild accession B1K-04-12 and another 14 cultivated materials, indicating that these genes are important during barley domestication. This work provides a global overview of the putative physiological and biological functions of PP2C genes in barley. We provide a broad framework for understanding the domestication- and evolutionary-induced changes in PP2C genes between wild and cultivated barley.}, } @article {pmid35625323, year = {2022}, author = {Khan, K and Basharat, Z and Jalal, K and Mashraqi, MM and Alzamami, A and Alshamrani, S and Uddin, R}, title = {Identification of Therapeutic Targets in an Emerging Gastrointestinal Pathogen Campylobacter ureolyticus and Possible Intervention through Natural Products.}, journal = {Antibiotics (Basel, Switzerland)}, volume = {11}, number = {5}, pages = {}, pmid = {35625323}, issn = {2079-6382}, abstract = {Campylobacter ureolyticus is a Gram-negative, anaerobic, non-spore-forming bacteria that causes gastrointestinal infections. Being the most prevalent cause of bacterial enteritis globally, infection by this bacterium is linked with significant morbidity and mortality in children and immunocompromised patients. No information on pan-therapeutic drug targets for this species is available yet. In the current study, a pan-genome analysis was performed on 13 strains of C. ureolyticus to prioritize potent drug targets from the identified core genome. In total, 26 druggable proteins were identified using subtractive genomics. To the best of the authors' knowledge, this is the first report on the mining of drug targets in C. ureolyticus. UDP-3-O-acyl-N-acetylglucosamine deacetylase (LpxC) was selected as a promiscuous pharmacological target for virtual screening of two bacterial-derived natural product libraries, i.e., postbiotics (n = 78) and streptomycin (n = 737) compounds. LpxC inhibitors from the ZINC database (n = 142 compounds) were also studied with reference to LpxC of C. ureolyticus. The top three docked compounds from each library (including ZINC26844580, ZINC13474902, ZINC13474878, Notoginsenoside St-4, Asiaticoside F, Paraherquamide E, Phytoene, Lycopene, and Sparsomycin) were selected based on their binding energies and validated using molecular dynamics simulations. To help identify potential risks associated with the selected compounds, ADMET profiling was also performed and most of the compounds were considered safe. Our findings may serve as baseline information for laboratory studies leading to the discovery of drugs for use against C. ureolyticus infections.}, } @article {pmid35616118, year = {2022}, author = {Yao, E and Blake, VC and Cooper, L and Wight, CP and Michel, S and Cagirici, HB and Lazo, GR and Birkett, CL and Waring, DJ and Jannink, JL and Holmes, I and Waters, AJ and Eickholt, DP and Sen, TZ}, title = {GrainGenes: a data-rich repository for small grains genetics and genomics.}, journal = {Database : the journal of biological databases and curation}, volume = {2022}, number = {}, pages = {}, pmid = {35616118}, issn = {1758-0463}, mesh = {Avena/genetics ; Chromosome Mapping ; Databases, Genetic ; *Genome, Plant/genetics ; Genomics ; *Hordeum/genetics ; Quantitative Trait Loci ; Triticum/genetics ; }, abstract = {As one of the US Department of Agriculture-Agricultural Research Service flagship databases, GrainGenes (https://wheat.pw.usda.gov) serves the data and community needs of globally distributed small grains researchers for the genetic improvement of the Triticeae family and Avena species that include wheat, barley, rye and oat. GrainGenes accomplishes its mission by continually enriching its cross-linked data content following the findable, accessible, interoperable and reusable principles, enhancing and maintaining an intuitive web interface, creating tools to enable easy data access and establishing data connections within and between GrainGenes and other biological databases to facilitate knowledge discovery. GrainGenes operates within the biological database community, collaborates with curators and genome sequencing groups and contributes to the AgBioData Consortium and the International Wheat Initiative through the Wheat Information System (WheatIS). Interactive and linked content is paramount for successful biological databases and GrainGenes now has 2917 manually curated gene records, including 289 genes and 254 alleles from the Wheat Gene Catalogue (WGC). There are >4.8 million gene models in 51 genome browser assemblies, 6273 quantitative trait loci and >1.4 million genetic loci on 4756 genetic and physical maps contained within 443 mapping sets, complete with standardized metadata. Most notably, 50 new genome browsers that include outputs from the Wheat and Barley PanGenome projects have been created. We provide an example of an expression quantitative trait loci track on the International Wheat Genome Sequencing Consortium Chinese Spring wheat browser to demonstrate how genome browser tracks can be adapted for different data types. To help users benefit more from its data, GrainGenes created four tutorials available on YouTube. GrainGenes is executing its vision of service by continuously responding to the needs of the global small grains community by creating a centralized, long-term, interconnected data repository. Database URL:https://wheat.pw.usda.gov.}, } @article {pmid35615513, year = {2022}, author = {Neuzil-Bunesova, V and Ramirez Garcia, A and Modrackova, N and Makovska, M and Sabolova, M and Spröer, C and Bunk, B and Blom, J and Schwab, C}, title = {Feed Insects as a Reservoir of Granadaene-Producing Lactococci.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {848490}, pmid = {35615513}, issn = {1664-302X}, abstract = {Insects are a component of the diet of different animal species and have been suggested as the major source of human dietary protein for the future. However, insects are also carriers of potentially pathogenic microbes that constitute a risk to food and feed safety. In this study, we reported the occurrence of a hemolytic orange pigmented producing phenotype of Lactococcus garvieae/petauri/formosensis in the fecal microbiota of golden lion tamarins (Leontopithecus rosalia) and feed larvae (Zophobas atratus). Feed insects were identified as a regular source of L. garvieae/petauri/formosensis based on a reanalysis of available 16S rRNA gene libraries. Pan-genome analysis suggested the existence of four clusters within the L. garvieae/petauri/formosensis group. The presence of cyl cluster indicated that some strains of the L. garvieae/petauri/formosensis group produced a pigment similar to granadaene, an orange cytotoxic lipid produced by group B streptococci, including Streptococcus agalactiae. Pigment production by L. garvieae/petauri/formosensis strains was dependent on the presence of the fermentable sugars, with no pigment being observed at pH <4.7. The addition of buffering compounds or arginine, which can be metabolized to ammonium, restored pigment formation. In addition, pigment formation might be related to the source of peptone. These data suggest that edible insects are a possible source of granadaene-producing lactococci, which can be considered a pathogenic risk with zoonotic potential.}, } @article {pmid35612623, year = {2022}, author = {Bach, E and Rangel, CP and Ribeiro, IDA and Passaglia, LMP}, title = {Pangenome analyses of Bacillus pumilus, Bacillus safensis, and Priestia megaterium exploring the plant-associated features of bacilli strains isolated from canola.}, journal = {Molecular genetics and genomics : MGG}, volume = {297}, number = {4}, pages = {1063-1079}, pmid = {35612623}, issn = {1617-4623}, support = {155771/2018-3//Conselho Nacional de Desenvolvimento Científico e Tecnológico/ ; }, mesh = {*Bacillus/genetics ; *Bacillus pumilus/genetics ; DNA ; Phylogeny ; }, abstract = {Previous genome mining of the strains Bacillus pumilus 7PB, Bacillus safensis 1TAz, 8Taz, and 32PB, and Priestia megaterium 16PB isolated from canola revealed differences in the profile of antimicrobial biosynthetic genes when compared to the species type strains. To evaluate not only the similarities among B. pumilus, B. safensis, and P. megaterium genomes but also the specificities found in the canola bacilli, we performed comparative genomic analyses through the pangenome evaluation of each species. Besides that, other genome features were explored, especially focusing on plant-associated and biotechnological characteristics. The combination of the genome metrics Average Nucleotide Identity and digital DNA-DNA hybridization formulas 1 and 3 adopting the universal thresholds of 95 and 70%, respectively, was suitable to verify the identification of strains from these groups. On average, core genes corresponded to 45%, 52%, and 34% of B. pumilus, B. safensis, and P. megaterium open pangenomes, respectively. Many genes related to adaptations to plant-associated lifestyles were predicted, especially in the Bacillus genomes. These included genes for acetoin production, polyamines utilization, root exudate chemoreceptors, biofilm formation, and plant cell-wall degrading enzymes. Overall, we could observe that strains of these species exhibit many features in common, whereas most of their variable genome portions have features yet to be uncovered. The observed antifungal activity of canola bacilli might be a result of the synergistic action of secondary metabolites, siderophores, and chitinases. Genome analysis confirmed that these species and strains have biotechnological potential to be used both as agricultural inoculants or hydrolases producers. Up to our knowledge, this is the first work that evaluates the pangenome features of P. megaterium.}, } @article {pmid35604683, year = {2022}, author = {Saldarriaga-Córdoba, M and Avendaño-Herrera, R}, title = {Comparative pan-genomic analysis of 51 Renibacterium salmoninarum indicates heterogeneity in the principal virulence factor, the 57 kDa protein.}, journal = {Journal of fish diseases}, volume = {45}, number = {8}, pages = {1173-1188}, doi = {10.1111/jfd.13653}, pmid = {35604683}, issn = {1365-2761}, support = {grant FONDAP 15110027//Agencia Nacional de Investigación y Desarrollo (ANID, Chile)/ ; }, mesh = {Animals ; *Fish Diseases/microbiology ; Genomics ; *Kidney Diseases/microbiology ; *Micrococcaceae/genetics ; Phylogeny ; Renibacterium ; Salmon ; Virulence Factors/genetics/metabolism ; }, abstract = {Renibacterium salmoninarum, a Gram-positive intracellular pathogen, is the causative agent of bacterial kidney disease (BKD), the impacts of which are high mortalities and economic losses for the salmon industry. This study provides novel analyses for the whole-genome sequences of 50 R. salmoninarum isolates and the reference strain ATCC 33209 using a pan-genomic approach to elucidate phylogenomic relationships and identify unique and shared genes associated with pathogenicity and infection mechanisms. Genome size varied from 3,061,638 to 3,155,332 bp; gene count from 3452 to 3580; and predicted coding sequences from 3402 to 3527. Comparative analyses revealed an open, but approaching closed, pan-genome. The pan-genome analysis recovered 4064 genes, with a core genome containing 3306 genes. Phylogenetic analysis of R. salmoninarum showed high genomic homogeneity, apart from one isolate obtained from Salmo trutta in Norway. All genomes presented the 57-kDa protein (p57). Strain ATCC 33209 and the Chilean isolates H-2 and DJ2R presented two copies of the msa gene, while the remaining isolates had one copy. The pan-genome analysis further identified differences in the number of copies and length of the signalling peptide for p57, the principal virulence factor reported for this bacterium. This heterogeneity could be associated with the secretion levels of p57, potentially influencing virulence. Additionally identified were numerous common genes related to iron uptake, the stress response and regulation, and cell signalling-all of which constitute the pathogenic repertoire of R. salmoninarum. This investigation provides information that is applicable in future studies for identifying therapeutic targets and/or for designing new strategies (e.g., vaccines) to prevent BKD infections in salmon farming.}, } @article {pmid35604129, year = {2022}, author = {de Korne-Elenbaas, J and Bruisten, SM and van Dam, AP and Maiden, MCJ and Harrison, OB}, title = {The Neisseria gonorrhoeae Accessory Genome and Its Association with the Core Genome and Antimicrobial Resistance.}, journal = {Microbiology spectrum}, volume = {10}, number = {3}, pages = {e0265421}, pmid = {35604129}, issn = {2165-0497}, support = {/WT_/Wellcome Trust/United Kingdom ; 218205/Z/19/Z/WT_/Wellcome Trust/United Kingdom ; 214374/Z/18/Z/WT_/Wellcome Trust/United Kingdom ; }, mesh = {Anti-Bacterial Agents/pharmacology ; Drug Resistance, Bacterial/genetics ; *Gonorrhea ; Humans ; Microbial Sensitivity Tests ; *Neisseria gonorrhoeae/genetics ; }, abstract = {The bacterial accessory genome provides the genetic flexibility needed to facilitate environment and host adaptation. In Neisseria gonorrhoeae, known accessory elements include plasmids which can transfer and mediate antimicrobial resistance (AMR); however, chromosomal accessory genes could also play a role in AMR. Here, the gonococcal accessory genome was characterized using gene-by-gene approaches and its association with the core genome and AMR were assessed. The gonococcal accessory gene pool consisted of 247 genes, which were mainly genes located on large mobile genetic elements, phage associated genes, or genes encoding putative secretion systems. Accessory elements showed similar synteny across genomes, indicating either a predisposition for particular genomic locations or ancestral inheritance that are conserved during strain expansion. Significant associations were found between the prevalence of accessory elements and core genome multi-locus sequence types (cgMLST), consistent with a structured gonococcal population despite frequent horizontal gene transfer (HGT). Increased prevalence of putative DNA exchange regulators was significantly associated with AMR, which included a putative secretion system, methyltransferases and a toxin-antitoxin system. Although frequent HGT results in high genetic diversity in the gonococcus, we found that this is mediated by a small gene pool. In fact, a highly organized genome composition was identified with a strong association between the accessory and core genome. Increased prevalence of DNA exchange regulators in antimicrobial resistant isolates suggests that genetic material exchange plays a role in the development or maintenance of AMR. These findings enhance our understanding of gonococcal genome architecture and have important implications for gonococcal population biology. IMPORTANCE The emergence of antimicrobial resistance (AMR) against third generation cephalosporins in Neisseria gonorrhoeae is a major public health concern, as these are antibiotics of last resort for the effective treatment of gonorrhea. Although the resistance mechanisms against this class of antibiotics have not been entirely resolved, resistance against other classes of antibiotics, such as tetracyclines, is known to be mediated through plasmids, which are known gonococcal extra-chromosomal accessory elements. A complete assessment of the chromosomal accessory genome content and its role in AMR has not yet been undertaken. Here, we comprehensively characterize the gonococcal accessory genome to better understand genome architecture as well as the evolution and mechanisms of AMR in this species.}, } @article {pmid35602063, year = {2022}, author = {Wang, C and Ye, Q and Jiang, A and Zhang, J and Shang, Y and Li, F and Zhou, B and Xiang, X and Gu, Q and Pang, R and Ding, Y and Wu, S and Chen, M and Wu, Q and Wang, J}, title = {Pseudomonas aeruginosa Detection Using Conventional PCR and Quantitative Real-Time PCR Based on Species-Specific Novel Gene Targets Identified by Pangenome Analysis.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {820431}, pmid = {35602063}, issn = {1664-302X}, abstract = {Mining novel specific molecular targets and establishing efficient identification methods are significant for detecting Pseudomonas aeruginosa, which can enable P. aeruginosa tracing in food and water. Pangenome analysis was used to analyze the whole genomic sequences of 2017 strains (including 1,000 P. aeruginosa strains and 1,017 other common foodborne pathogen strains) downloaded from gene databases to obtain novel species-specific genes, yielding a total of 11 such genes. Four novel target genes, UCBPP-PA14_00095, UCBPP-PA14_03237, UCBPP-PA14_04976, and UCBPP-PA14_03627, were selected for use, which had 100% coverage in the target strain and were not present in nontarget bacteria. PCR primers (PA1, PA2, PA3, and PA4) and qPCR primers (PA12, PA13, PA14, and PA15) were designed based on these target genes to establish detection methods. For the PCR primer set, the minimum detection limit for DNA was 65.4 fg/μl, which was observed for primer set PA2 of the UCBPP-PA14_03237 gene. The detection limit in pure culture without pre-enrichment was 10[5] colony-forming units (CFU)/ml for primer set PA1, 10[3] CFU/ml for primer set PA2, and 10[4] CFU/ml for primer set PA3 and primer set PA4. Then, qPCR standard curves were established based on the novel species-specific targets. The standard curves showed perfect linear correlations, with R [2] values of 0.9901 for primer set PA12, 0.9915 for primer set PA13, 0.9924 for primer set PA14, and 0.9935 for primer set PA15. The minimum detection limit of the real-time PCR (qPCR) assay was 10[2] CFU/ml for pure cultures of P. aeruginosa. Compared with the endpoint PCR and traditional culture methods, the qPCR assay was more sensitive by one or two orders of magnitude. The feasibility of these methods was satisfactory in terms of sensitivity, specificity, and efficiency after evaluating 29 ready-to-eat vegetable samples and was almost consistent with that of the national standard detection method. The developed assays can be applied for rapid screening and detection of pathogenic P. aeruginosa, providing accurate results to inform effective monitoring measures in order to improve microbiological safety.}, } @article {pmid35602040, year = {2022}, author = {Geng, R and Cheng, L and Cao, C and Liu, Z and Liu, D and Xiao, Z and Wu, X and Huang, Z and Feng, Q and Luo, C and Chen, Z and Zhang, Z and Jiang, C and Ren, M and Yang, A}, title = {Comprehensive Analysis Reveals the Genetic and Pathogenic Diversity of Ralstonia solanacearum Species Complex and Benefits Its Taxonomic Classification.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {854792}, pmid = {35602040}, issn = {1664-302X}, abstract = {Ralstonia solanacearum species complex (RSSC) is a diverse group of plant pathogens that attack a wide range of hosts and cause devastating losses worldwide. In this study, we conducted a comprehensive analysis of 131 RSSC strains to detect their genetic diversity, pathogenicity, and evolution dynamics. Average nucleotide identity analysis was performed to explore the genomic relatedness among these strains, and finally obtained an open pangenome with 32,961 gene families. To better understand the diverse evolution and pathogenicity, we also conducted a series of analyses of virulence factors (VFs) and horizontal gene transfer (HGT) in the pangenome and at the single genome level. The distribution of VFs and mobile genetic elements (MGEs) showed significant differences among different groups and strains, which were consistent with the new nomenclatures of the RSSC with three distinct species. Further functional analysis showed that most HGT events conferred from Burkholderiales and played a great role in shaping the genomic plasticity and genetic diversity of RSSC genomes. Our work provides insights into the genetic polymorphism, evolution dynamics, and pathogenetic variety of RSSC and provides strong supports for the new taxonomic classification, as well as abundant resources for studying host specificity and pathogen emergence.}, } @article {pmid35602010, year = {2022}, author = {Mizzi, R and Plain, KM and Whittington, R and Timms, VJ}, title = {Global Phylogeny of Mycobacterium avium and Identification of Mutation Hotspots During Niche Adaptation.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {892333}, pmid = {35602010}, issn = {1664-302X}, abstract = {Mycobacterium avium is separated into four subspecies: M. avium subspecies avium (MAA), M. avium subspecies silvaticum (MAS), M. avium subspecies hominissuis (MAH), and M. avium subspecies paratuberculosis (MAP). Understanding the mechanisms of host and tissue adaptation leading to their clinical significance is vital to reduce the economic, welfare, and public health concerns associated with diseases they may cause in humans and animals. Despite substantial phenotypic diversity, the subspecies nomenclature is controversial due to high genetic similarity. Consequently, a set of 1,230 M. avium genomes was used to generate a phylogeny, investigate SNP hotspots, and identify subspecies-specific genes. Phylogeny reiterated the findings from previous work and established that Mycobacterium avium is a species made up of one highly diverse subspecies, known as MAH, and at least two clonal pathogens, named MAA and MAP. Pan-genomes identified coding sequences unique to each subspecies, and in conjunction with a mapping approach, mutation hotspot regions were revealed compared to the reference genomes for MAA, MAH, and MAP. These subspecies-specific genes may serve as valuable biomarkers, providing a deeper understanding of genetic differences between M. avium subspecies and the virulence mechanisms of mycobacteria. Furthermore, SNP analysis demonstrated common regions between subspecies that have undergone extensive mutations during niche adaptation. The findings provide insights into host and tissue specificity of this genetically conserved but phenotypically diverse species, with the potential to provide new diagnostic targets and epidemiological and therapeutic advances.}, } @article {pmid35590072, year = {2022}, author = {Verdez, S and Thomas, Q and Garret, P and Verstuyft, C and Tisserant, E and Vitobello, A and Mau-Them, FT and Philippe, C and Bardou, M and Luu, M and Bourredjem, A and Callier, P and Thauvin-Robinet, C and Picard, N and Faivre, L and Duffourd, Y}, title = {Exome sequencing allows detection of relevant pharmacogenetic variants in epileptic patients.}, journal = {The pharmacogenomics journal}, volume = {22}, number = {5-6}, pages = {258-263}, pmid = {35590072}, issn = {1473-1150}, mesh = {Humans ; *Pharmacogenomic Variants ; Phenytoin ; Exome/genetics ; Retrospective Studies ; *Epilepsy/diagnosis/drug therapy/genetics ; }, abstract = {Beyond the identification of causal genetic variants in the diagnosis of Mendelian disorders, exome sequencing can detect numerous variants with potential relevance for clinical care. Clinical interventions can thus be conducted to improve future health outcomes for patients and their at-risk relatives, such as predicting late-onset genetic disorders accessible to prevention, treatment or identifying differential drug efficacy and safety. To evaluate the interest of such pharmacogenetic information, we designed an "in house" pipeline to determine the status of 122 PharmGKB (Pharmacogenomics Knowledgebase) variant-drug combinations in 31 genes. This pipeline was applied to a cohort of 90 epileptic patients who had previously an exome sequencing (ES) analysis, to determine the frequency of pharmacogenetic variants. We performed a retrospective analysis of drug plasma concentrations and treatment efficacy in patients bearing at least one relevant PharmGKB variant. For PharmGKB level 1A variants, CYP2C9 status for phenytoin prescription was the only relevant information. Nineteen patients were treated with phenytoin, among phenytoin-treated patients, none were poor metabolizers and four were intermediate metabolizers. While being treated with a standard protocol (10-23 mg/kg/30 min loading dose followed by 5 mg/kg/8 h maintenance dose), all identified intermediate metabolizers had toxic plasma concentrations (20 mg/L). In epileptic patients, pangenomic sequencing can provide information about common pharmacogenetic variants likely to be useful to guide therapeutic drug monitoring, and in the case of phenytoin, to prevent clinical toxicity caused by high plasma levels.}, } @article {pmid35588244, year = {2022}, author = {Gluck-Thaler, E and Ralston, T and Konkel, Z and Ocampos, CG and Ganeshan, VD and Dorrance, AE and Niblack, TL and Wood, CW and Slot, JC and Lopez-Nicora, HD and Vogan, AA}, title = {Giant Starship Elements Mobilize Accessory Genes in Fungal Genomes.}, journal = {Molecular biology and evolution}, volume = {39}, number = {5}, pages = {}, pmid = {35588244}, issn = {1537-1719}, mesh = {DNA Transposable Elements ; Eukaryotic Cells ; *Genome, Fungal ; Humans ; *Virulence Factors ; }, abstract = {Accessory genes are variably present among members of a species and are a reservoir of adaptive functions. In bacteria, differences in gene distributions among individuals largely result from mobile elements that acquire and disperse accessory genes as cargo. In contrast, the impact of cargo-carrying elements on eukaryotic evolution remains largely unknown. Here, we show that variation in genome content within multiple fungal species is facilitated by Starships, a newly discovered group of massive mobile elements that are 110 kb long on average, share conserved components, and carry diverse arrays of accessory genes. We identified hundreds of Starship-like regions across every major class of filamentous Ascomycetes, including 28 distinct Starships that range from 27 to 393 kb and last shared a common ancestor ca. 400 Ma. Using new long-read assemblies of the plant pathogen Macrophomina phaseolina, we characterize four additional Starships whose activities contribute to standing variation in genome structure and content. One of these elements, Voyager, inserts into 5S rDNA and contains a candidate virulence factor whose increasing copy number has contrasting associations with pathogenic and saprophytic growth, suggesting Voyager's activity underlies an ecological trade-off. We propose that Starships are eukaryotic analogs of bacterial integrative and conjugative elements based on parallels between their conserved components and may therefore represent the first dedicated agents of active gene transfer in eukaryotes. Our results suggest that Starships have shaped the content and structure of fungal genomes for millions of years and reveal a new concerted route for evolution throughout an entire eukaryotic phylum.}, } @article {pmid35585492, year = {2022}, author = {Ghimire, N and Kim, B and Lee, CM and Oh, TJ}, title = {Comparative genome analysis among Variovorax species and genome guided aromatic compound degradation analysis emphasizing 4-hydroxybenzoate degradation in Variovorax sp. PAMC26660.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {375}, pmid = {35585492}, issn = {1471-2164}, support = {PM21030//the Ministry of Oceans and Fisheries, Korea/ ; }, mesh = {Carbon ; *Parabens ; Phylogeny ; *Xenobiotics ; }, abstract = {BACKGROUND: While the genus Variovorax is known for its aromatic compound metabolism, no detailed study of the peripheral and central pathways of aromatic compound degradation has yet been reported. Variovorax sp. PAMC26660 is a lichen-associated bacterium isolated from Antarctica. The work presents the genome-based elucidation of peripheral and central catabolic pathways of aromatic compound degradation genes in Variovorax sp. PAMC26660. Additionally, the accessory, core and unique genes were identified among Variovorax species using the pan genome analysis tool. A detailed analysis of the genes related to xenobiotic metabolism revealed the potential roles of Variovorax sp. PAMC26660 and other species in bioremediation.

RESULTS: TYGS analysis, dDDH, phylogenetic placement and average nucleotide identity (ANI) analysis identified the strain as Variovorax sp. Cell morphology was assessed using scanning electron microscopy (SEM). On analysis of the core, accessory, and unique genes, xenobiotic metabolism accounted only for the accessory and unique genes. On detailed analysis of the aromatic compound catabolic genes, peripheral pathway related to 4-hydroxybenzoate (4-HB) degradation was found among all species while phenylacetate and tyrosine degradation pathways were present in most of the species including PAMC26660. Likewise, central catabolic pathways, like protocatechuate, gentisate, homogentisate, and phenylacetyl-CoA, were also present. The peripheral pathway for 4-HB degradation was functionally tested using PAMC26660, which resulted in the growth using it as a sole source of carbon.

CONCLUSIONS: Computational tools for genome and pan genome analysis are important to understand the behavior of an organism. Xenobiotic metabolism-related genes, that only account for the accessory and unique genes infer evolution through events like lateral gene transfer, mutation and gene rearrangement. 4-HB, an aromatic compound present among lichen species is utilized by lichen-associated Variovorax sp. PAMC26660 as the sole source of carbon. The strain holds genes and pathways for its utilization. Overall, this study outlines the importance of Variovorax in bioremediation and presents the genomic information of the species.}, } @article {pmid35579358, year = {2022}, author = {Nanni, AV and Morse, AM and Newman, JRB and Choquette, NE and Wedow, JM and Liu, Z and Leakey, ADB and Conesa, A and Ainsworth, EA and McIntyre, LM}, title = {Variation in leaf transcriptome responses to elevated ozone corresponds with physiological sensitivity to ozone across maize inbred lines.}, journal = {Genetics}, volume = {221}, number = {4}, pages = {}, pmid = {35579358}, issn = {1943-2631}, support = {R01 GM128193/GM/NIGMS NIH HHS/United States ; R03 CA222444/CA/NCI NIH HHS/United States ; }, mesh = {Gene Expression Regulation, Plant ; Genotype ; *Ozone/metabolism/toxicity ; Plant Leaves/genetics/metabolism ; Transcriptome ; *Zea mays/genetics/metabolism ; }, abstract = {We examine the impact of sustained elevated ozone concentration on the leaf transcriptome of 5 diverse maize inbred genotypes, which vary in physiological sensitivity to ozone (B73, Mo17, Hp301, C123, and NC338), using long reads to assemble transcripts and short reads to quantify expression of these transcripts. More than 99% of the long reads, 99% of the assembled transcripts, and 97% of the short reads map to both B73 and Mo17 reference genomes. Approximately 95% of the genes with assembled transcripts belong to known B73-Mo17 syntenic loci and 94% of genes with assembled transcripts are present in all temperate lines in the nested association mapping pan-genome. While there is limited evidence for alternative splicing in response to ozone stress, there is a difference in the magnitude of differential expression among the 5 genotypes. The transcriptional response to sustained ozone stress in the ozone resistant B73 genotype (151 genes) was modest, while more than 3,300 genes were significantly differentially expressed in the more sensitive NC338 genotype. There is the potential for tandem duplication in 30% of genes with assembled transcripts, but there is no obvious association between potential tandem duplication and differential expression. Genes with a common response across the 5 genotypes (83 genes) were associated with photosynthesis, in particular photosystem I. The functional annotation of genes not differentially expressed in B73 but responsive in the other 4 genotypes (789) identifies reactive oxygen species. This suggests that B73 has a different response to long-term ozone exposure than the other 4 genotypes. The relative magnitude of the genotypic response to ozone, and the enrichment analyses are consistent regardless of whether aligning short reads to: long read assembled transcripts; the B73 reference; the Mo17 reference. We find that prolonged ozone exposure directly impacts the photosynthetic machinery of the leaf.}, } @article {pmid35578144, year = {2022}, author = {Alvarez, F and Simonetti, E and Draghi, WO and Vinacour, M and Palumbo, MC and Do Porto, DF and Montecchia, MS and Roberts, IN and Ruiz, JA}, title = {Genome mining of Burkholderia ambifaria strain T16, a rhizobacterium able to produce antimicrobial compounds and degrade the mycotoxin fusaric acid.}, journal = {World journal of microbiology & biotechnology}, volume = {38}, number = {7}, pages = {114}, pmid = {35578144}, issn = {1573-0972}, support = {PICT2017-3193//Agencia Nacional de Promoción Científica y Tecnológica/ ; PICT2017-1500//Agencia Nacional de Promoción Científica y Tecnológica/ ; PICT2017-1500//Agencia Nacional de Promoción Científica y Tecnológica/ ; PICT2017-1500//Agencia Nacional de Promoción Científica y Tecnológica/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; UBACyT 20020130200117BA//Secretaria de Ciencia y Tecnica, Universidad de Buenos Aires/ ; PUE 0136//Consejo Nacional de Investigaciones Científicas y Técnicas/ ; PUE 0136//Consejo Nacional de Investigaciones Científicas y Técnicas/ ; PUE 0136//Consejo Nacional de Investigaciones Científicas y Técnicas/ ; PUE 0136//Consejo Nacional de Investigaciones Científicas y Técnicas/ ; }, mesh = {*Anti-Infective Agents/metabolism ; *Burkholderia/metabolism ; *Burkholderia cepacia complex/genetics ; Fusaric Acid/metabolism ; Genome, Bacterial ; *Mycotoxins/metabolism ; }, abstract = {Burkholderia ambifaria T16 is a bacterium isolated from the rhizosphere of barley plants that showed a remarkable antifungal activity. This strain was also able to degrade fusaric acid (5-Butylpyridine-2-carboxylic acid) and detoxify this mycotoxin in inoculated barley seedlings. Genes and enzymes responsible for fusaric acid degradation have an important biotechnological potential in the control of fungal diseases caused by fusaric acid producers, or in the biodegradation/bio catalysis processes of pyridine derivatives. In this study, the complete genome of B. ambifaria T16 was sequenced and analyzed to identify genes involved in survival and competition in the rhizosphere, plant growth promotion, fungal growth inhibition, and degradation of aromatic compounds. The genomic analysis revealed the presence of several operons for the biosynthesis of antimicrobial compounds, such as pyrrolnitrin, ornibactin, occidiofungin and the membrane-associated AFC-BC11. These compounds were also detected in bacterial culture supernatants by mass spectrometry analysis. In addition, this strain has multiple genes contributing to its plant growth-promoting profile, including those for acetoin, 2,3-butanediol and indole-3-acetic acid production, siderophores biosynthesis, and solubilisation of organic and inorganic phosphate. A pan-genomic analysis demonstrated that the genome of strain T16 possesses large gene clusters that are absent in the genomes of B. ambifaria reference strains. According to predictions, most of these clusters would be involved in aromatic compounds degradation. One genomic region, encoding flavin-dependent monooxygenases of unknown function, is proposed as a candidate responsible for fusaric acid degradation.}, } @article {pmid35575437, year = {2022}, author = {Abdullah, IT and Ulijasz, AT and Girija, UV and Tam, S and Andrew, P and Hiller, NL and Wallis, R and Yesilkaya, H}, title = {Structure-function analysis for the development of peptide inhibitors for a Gram-positive quorum sensing system.}, journal = {Molecular microbiology}, volume = {117}, number = {6}, pages = {1464-1478}, pmid = {35575437}, issn = {1365-2958}, support = {R01 AI135060/AI/NIAID NIH HHS/United States ; R01 AI139077/AI/NIAID NIH HHS/United States ; }, mesh = {Bacterial Proteins/metabolism ; *Gene Expression Regulation, Bacterial ; Peptides/metabolism ; *Quorum Sensing/genetics ; Streptococcus pneumoniae/metabolism ; }, abstract = {The Streptococcus pneumoniae Rgg144/SHP144 regulator-peptide quorum sensing (QS) system is critical for nutrient utilization, oxidative stress response, and virulence. Here, we characterized this system by assessing the importance of each residue within the active short hydrophobic peptide (SHP) by alanine-scanning mutagenesis and testing the resulting peptides for receptor binding and activation of the receptor. Interestingly, several of the mutations had little effect on binding to Rgg144 but reduced transcriptional activation appreciably. In particular, a proline substitution (P21A) reduced transcriptional activation by 29-fold but bound with a 3-fold higher affinity than the wild-type SHP. Consistent with the function of Rgg144, the mutant peptide led to decreased utilization of mannose and increased susceptibility to superoxide generator paraquat. Pangenome comparison showed full conservation of P21 across SHP144 allelic variants. Crystallization of Rgg144 in the absence of peptide revealed a comparable structure to the DNA bound and free forms of its homologs suggesting similar mechanisms of activation. Together, these analyses identify key interactions in a critical pneumococcal QS system. Further manipulation of the SHP has the potential to facilitate the development of inhibitors that are functional across strains. The approach described here is likely to be effective across QS systems in multiple species.}, } @article {pmid35573770, year = {2022}, author = {Chen, H and Li, Y and Xie, X and Chen, M and Xue, L and Wang, J and Ye, Q and Wu, S and Yang, R and Zhao, H and Zhang, J and Ding, Y and Wu, Q}, title = {Exploration of the Molecular Mechanisms Underlying the Anti-Photoaging Effect of Limosilactobacillus fermentum XJC60.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {838060}, pmid = {35573770}, issn = {2235-2988}, mesh = {Animals ; Antioxidants/metabolism ; Guinea Pigs ; Reactive Oxygen Species/metabolism ; Skin ; *Skin Aging ; Ultraviolet Rays ; }, abstract = {Although lactic acid bacteria (LAB) were shown to be effective for preventing photoaging, the underlying molecular mechanisms have not been fully elucidated. Accordingly, we examined the anti-photoaging potential of 206 LAB isolates and discovered 32 strains with protective activities against UV-induced injury. All of these 32 LABs exhibited high levels of 2,2-diphenyl-picrylhydrazyl, as well as hydroxyl free radical scavenging ability (46.89-85.13% and 44.29-95.97%, respectively). Genome mining and metabonomic verification of the most effective strain, Limosilactobacillus fermentum XJC60, revealed that the anti-photoaging metabolite of LAB was nicotinamide (NAM; 18.50 mg/L in the cell-free serum of XJC60). Further analysis revealed that LAB-derived NAM could reduce reactive oxygen species levels by 70%, stabilize the mitochondrial membrane potential, and increase the NAD[+]/NADH ratio in UV-injured skin cells. Furthermore, LAB-derived NAM downregulated the transcript levels of matrix metalloproteinase (MMP)-1, MMP-3, interleukin (IL)-1β, IL-6, and IL-8 in skin cells. In vivo, XJC60 relieved imflammation and protected skin collagen fiber integrity in UV-injured Guinea pigs. Overall, our findings elucidate that LAB-derived NAM might protect skin from photoaging by stabilizing mitochondrial function, establishing a therotical foundation for the use of probiotics in the maintenance of skin health.}, } @article {pmid35567182, year = {2022}, author = {Petereit, J and Marsh, JI and Bayer, PE and Danilevicz, MF and Thomas, WJW and Batley, J and Edwards, D}, title = {Genetic and Genomic Resources for Soybean Breeding Research.}, journal = {Plants (Basel, Switzerland)}, volume = {11}, number = {9}, pages = {}, pmid = {35567182}, issn = {2223-7747}, support = {DP210100296; DP200100762, and DE210100398//Australian Research Council/ ; 9177539 and 9177591//Grains Research and Development Corporation/ ; }, abstract = {Soybean (Glycine max) is a legume species of significant economic and nutritional value. The yield of soybean continues to increase with the breeding of improved varieties, and this is likely to continue with the application of advanced genetic and genomic approaches for breeding. Genome technologies continue to advance rapidly, with an increasing number of high-quality genome assemblies becoming available. With accumulating data from marker arrays and whole-genome resequencing, studying variations between individuals and populations is becoming increasingly accessible. Furthermore, the recent development of soybean pangenomes has highlighted the significant structural variation between individuals, together with knowledge of what has been selected for or lost during domestication and breeding, information that can be applied for the breeding of improved cultivars. Because of this, resources such as genome assemblies, SNP datasets, pangenomes and associated databases are becoming increasingly important for research underlying soybean crop improvement.}, } @article {pmid35563985, year = {2022}, author = {Yi, Z and Xie, J}, title = {Genomic Analysis of Two Representative Strains of Shewanella putrefaciens Isolated from Bigeye Tuna: Biofilm and Spoilage-Associated Behavior.}, journal = {Foods (Basel, Switzerland)}, volume = {11}, number = {9}, pages = {}, pmid = {35563985}, issn = {2304-8158}, support = {31972142//National Natural Science Foundation of China/ ; 19DZ1207503//key project of Science and Technology Commission of Shanghai Municipality/ ; CARS-47//China Agriculture Research System of MOF and MARA/ ; 19DZ2284000//Shanghai Municipal Science and Technology Project to enhance the capabilities of the platform/ ; }, abstract = {Shewanella putrefaciens can cause the spoilage of seafood and shorten its shelf life. In this study, both strains of S. putrefaciens (YZ08 and YZ-J) isolated from spoiled bigeye tuna were subjected to in-depth phenotypic and genotypic characterization to better understand their roles in seafood spoilage. The complete genome sequences of strains YZ08 and YZ-J were reported. Unique genes of the two S. putrefaciens strains were identified by pan-genomic analysis. In vitro experiments revealed that YZ08 and YZ-J could adapt to various environmental stresses, including cold-shock temperature, pH, NaCl, and nutrient stresses. YZ08 was better at adapting to NaCl stress, and its genome possessed more NaCl stress-related genes compared with the YZ-J strain. YZ-J was a higher biofilm and exopolysaccharide producer than YZ08 at 4 and 30 °C, while YZ08 showed greater motility and enhanced capacity for biogenic amine metabolism, trimethylamine metabolism, and sulfur metabolism compared with YZ-J at both temperatures. That YZ08 produced low biofilm and exopolysaccharide contents and displayed high motility may be associated with the presence of more a greater number of genes encoding chemotaxis-related proteins (cheX) and low expression of the bpfA operon. This study provided novel molecular targets for the development of new antiseptic antisepsis strategies.}, } @article {pmid35562911, year = {2022}, author = {Du, Y and Jin, Y and Li, B and Yue, J and Yin, Z}, title = {Comparative Genomic Analysis of Vibrio cincinnatiensis Provides Insights into Genetic Diversity, Evolutionary Dynamics, and Pathogenic Traits of the Species.}, journal = {International journal of molecular sciences}, volume = {23}, number = {9}, pages = {}, pmid = {35562911}, issn = {1422-0067}, support = {ZR2021QC208//Shandong Provincial Natural Science Foundation/ ; 010/721000//Scientific Research Foundation of Shandong Agricultural University/ ; 2018ZX10101-003-001-008//the National Science and Technology Major Project of Infectious Diseases/ ; }, mesh = {Gene Transfer, Horizontal ; Genetic Variation ; *Genome, Bacterial ; Genomics/methods ; Phylogeny ; *Vibrio/genetics ; }, abstract = {Vibrio cincinnatiensis is a poorly understood pathogenic Vibrio species, and the underlying mechanisms of its genetic diversity, genomic plasticity, evolutionary dynamics, and pathogenicity have not yet been comprehensively investigated. Here, a comparative genomic analysis of V. cincinnatiensis was constructed. The open pan-genome with a flexible gene repertoire exhibited genetic diversity. The genomic plasticity and stability were characterized by the determinations of diverse mobile genetic elements (MGEs) and barriers to horizontal gene transfer (HGT), respectively. Evolutionary divergences were exhibited by the difference in functional enrichment and selective pressure between the different components of the pan-genome. The evolution on the Chr I and Chr II core genomes was mainly driven by purifying selection. Predicted essential genes in V. cincinnatiensis were mainly found in the core gene families on Chr I and were subject to stronger evolutionary constraints. We identified diverse virulence-related elements, including the gene clusters involved in encoding flagella, secretion systems, several pili, and scattered virulence genes. Our results indicated the pathogenic potential of V. cincinnatiensis and highlighted that HGT events from other Vibrio species promoted pathogenicity. This pan-genome study provides comprehensive insights into this poorly understood species from the genomic perspective.}, } @article {pmid35560205, year = {2022}, author = {Song, JM and Zhang, Y and Zhou, ZW and Lu, S and Ma, W and Lu, C and Chen, LL and Guo, L}, title = {Oil plant genomes: current state of the science.}, journal = {Journal of experimental botany}, volume = {73}, number = {9}, pages = {2859-2874}, doi = {10.1093/jxb/erab472}, pmid = {35560205}, issn = {1460-2431}, support = {31871658//National Natural Science Foundation of China/ ; 2019CFA014//Hubei Provincial Natural Science Foundation of China/ ; 2016YFD0101000//National Key Research and Development Plan of China/ ; B20051//Higher Education Discipline Innovation Project/ ; }, mesh = {Crops, Agricultural/genetics ; *Genome, Plant ; Genomics ; *Polyploidy ; }, abstract = {Vegetable oils are an indispensable nutritional component of the human diet as well as important raw materials for a variety of industrial applications such as pharmaceuticals, cosmetics, oleochemicals, and biofuels. Oil plant genomes are highly diverse, and their genetic variation leads to a diversity in oil biosynthesis and accumulation along with agronomic traits. This review discusses plant oil biosynthetic pathways, current state of genome assembly, polyploidy and asymmetric evolution of genomes of oil plants and their wild relatives, and research progress of pan-genomics in oil plants. The availability of complete high-resolution genomes and pan-genomes has enabled the identification of structural variations in the genomes that are associated with the diversity of agronomic and environment fitness traits. These and future genomes also provide powerful tools to understand crop evolution and to harvest the rich natural variations to improve oil crops for enhanced productivity, oil quality, and adaptability to changing environments.}, } @article {pmid35557713, year = {2022}, author = {Zhou, J and Hu, M and Hu, A and Li, C and Ren, X and Tao, M and Xue, Y and Chen, S and Tang, C and Xu, Y and Zhang, L and Zhou, X}, title = {Isolation and Genome Analysis of Pectobacterium colocasium sp. nov. and Pectobacterium aroidearum, Two New Pathogens of Taro.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {852750}, pmid = {35557713}, issn = {1664-462X}, abstract = {Bacterial soft rot is one of the most destructive diseases of taro (Colocasia esculenta) worldwide. In recent years, frequent outbreaks of soft rot disease have seriously affected taro production and became a major constraint to the development of taro planting in China. However, little is known about the causal agents of this disease, and the only reported pathogens are two Dickeya species and P. carotovorum. In this study, we report taro soft rot caused by two novel Pectobacterium strains, LJ1 and LJ2, isolated from taro corms in Ruyuan County, Shaoguan City, Guangdong Province, China. We showed that LJ1 and LJ2 fulfill Koch's postulates for taro soft rot. The two pathogens can infect taro both individually and simultaneously, and neither synergistic nor antagonistic interaction was observed between the two pathogens. Genome sequencing of the two strains indicated that LJ1 represents a novel species of the genus Pectobacterium, for which the name "Pectobacterium colocasium sp. nov." is proposed, while LJ2 belongs to Pectobacterium aroidearum. Pan-genome analysis revealed multiple pathogenicity-related differences between LJ1, LJ2, and other Pectobacterium species, including unique virulence factors, variation in the copy number and organization of Type III, IV, and VI secretion systems, and differential production of plant cell wall degrading enzymes. This study identifies two new soft rot Pectobacteriaceae (SRP) pathogens causing taro soft rot in China, reports a new case of co-infection of plant pathogens, and provides valuable resources for further investigation of the pathogenic mechanisms of SRP.}, } @article {pmid35552372, year = {2022}, author = {Guarracino, A and Heumos, S and Nahnsen, S and Prins, P and Garrison, E}, title = {ODGI: understanding pangenome graphs.}, journal = {Bioinformatics (Oxford, England)}, volume = {38}, number = {13}, pages = {3319-3326}, pmid = {35552372}, issn = {1367-4811}, support = {R01 GM123489/GM/NIGMS NIH HHS/United States ; U01 DA047638/DA/NIDA NIH HHS/United States ; #2118709//NSF PPoSS/ ; //Federal Ministry for Economic Affairs and Energy of Germany/ ; //BMBF/ ; 031A537B//German Network for Bioinformatics Infrastructure/ ; }, mesh = {*Software ; *Genome ; Genomics ; Algorithms ; Documentation ; }, abstract = {MOTIVATION: Pangenome graphs provide a complete representation of the mutual alignment of collections of genomes. These models offer the opportunity to study the entire genomic diversity of a population, including structurally complex regions. Nevertheless, analyzing hundreds of gigabase-scale genomes using pangenome graphs is difficult as it is not well-supported by existing tools. Hence, fast and versatile software is required to ask advanced questions to such data in an efficient way.

RESULTS: We wrote Optimized Dynamic Genome/Graph Implementation (ODGI), a novel suite of tools that implements scalable algorithms and has an efficient in-memory representation of DNA pangenome graphs in the form of variation graphs. ODGI supports pre-built graphs in the Graphical Fragment Assembly format. ODGI includes tools for detecting complex regions, extracting pangenomic loci, removing artifacts, exploratory analysis, manipulation, validation and visualization. Its fast parallel execution facilitates routine pangenomic tasks, as well as pipelines that can quickly answer complex biological questions of gigabase-scale pangenome graphs.

ODGI is published as free software under the MIT open source license. Source code can be downloaded from https://github.com/pangenome/odgi and documentation is available at https://odgi.readthedocs.io. ODGI can be installed via Bioconda https://bioconda.github.io/recipes/odgi/README.html or GNU Guix https://github.com/pangenome/odgi/blob/master/guix.scm.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.}, } @article {pmid35551692, year = {2022}, author = {Cella, E and Sutcliffe, CG and Tso, C and Paul, E and Ritchie, N and Colelay, J and Denny, E and Grant, LR and Weatherholtz, RC and Hammitt, LL and Azarian, T}, title = {Carriage prevalence and genomic epidemiology of Staphylococcus aureus among Native American children and adults in the Southwestern USA.}, journal = {Microbial genomics}, volume = {8}, number = {5}, pages = {}, pmid = {35551692}, issn = {2057-5858}, support = {K22 AI141582/AI/NIAID NIH HHS/United States ; }, mesh = {Adult ; Child ; Cross-Sectional Studies ; Genomics ; Humans ; Prevalence ; *Staphylococcal Infections/epidemiology/microbiology ; *Staphylococcus aureus ; American Indian or Alaska Native ; }, abstract = {Native American individuals in the Southwestern USA experience a higher burden of invasive Staphylococcus aureus disease than the general population. However, little is known about S. aureus carriage in these communities. A cross-sectional study was conducted to determine the carriage prevalence, risk factors and genomic epidemiology of S. aureus among Native American children (<5 years, n=121) and adults (≥18 years, n=167) in the Southwestern USA. Short- and long-read sequencing data were generated using Illumina and Oxford Nanopore Technology platforms to produce high-quality hybrid assemblies, and antibiotic-resistance, virulence and pangenome analyses were performed. S. aureus carriage prevalence was 20.7 % among children, 30.2 % among adults 18-64 years and 16.7 % among adults ≥65 years. Risk factors among adults included recent surgery, prior S. aureus infection among household members, and recent use of gyms or locker rooms by household members. No risk factors were identified among children. The bacterial population structure was dominated by clonal complex 1 (CC1) (21.1 %), CC5 (22.2 %) and CC8 (22.2 %). Isolates from children and adults were intermixed throughout the phylogeny. While the S. aureus population was diverse, the carriage prevalence was comparable to that in the general USA population. Genomic and risk-factor data suggest household, community and healthcare transmission are important components of the local epidemiology.}, } @article {pmid35550024, year = {2022}, author = {Mesa, V and Monot, M and Ferraris, L and Popoff, M and Mazuet, C and Barbut, F and Delannoy, J and Dupuy, B and Butel, MJ and Aires, J}, title = {Core-, pan- and accessory genome analyses of Clostridium neonatale: insights into genetic diversity.}, journal = {Microbial genomics}, volume = {8}, number = {5}, pages = {}, pmid = {35550024}, issn = {2057-5858}, mesh = {*Clostridium/genetics ; Genetic Variation ; *Genome, Bacterial ; Humans ; Infant, Newborn ; Phylogeny ; }, abstract = {Clostridium neonatale is a potential opportunistic pathogen recovered from faecal samples in cases of necrotizing enterocolitis (NEC), a gastrointestinal disease affecting preterm neonates. Although the C. neonatale species description and name validation were published in 2018, comparative genomics are lacking. In the present study, we provide the closed genome assembly of the C. neonatale ATCC BAA-265[T] (=250.09) reference strain with a manually curated functional annotation of the coding sequences. Pan-, core- and accessory genome analyses were performed using the complete 250.09 genome (4.7 Mb), three new assemblies (4.6-5.6 Mb), and five publicly available draft genome assemblies (4.6-4.7 Mb). The C. neonatale pan-genome contains 6840 genes, while the core-genome has 3387 genes. Pan-genome analysis revealed an 'open' state and genomic diversity. The strain-specific gene families ranged from five to 742 genes. Multiple mobile genetic elements were predicted, including a total of 201 genomic islands, 13 insertion sequence families, one CRISPR-Cas type I-B system and 15 predicted intact prophage signatures. Primary virulence classes including offensive, defensive, regulation of virulence-associated genes and non-specific virulence factors were identified. The presence of a tet(W/N/W) gene encoding a tetracycline resistance ribosomal protection protein and a 23S rRNA methyltransferase ermQ gene were identified in two different strains. Together, our results revealed a genetic diversity and plasticity of C. neonatale genomes and provide a comprehensive view of this species genomic features, paving the way for the characterization of its biological capabilities.}, } @article {pmid35529944, year = {2022}, author = {Zhou, X and Liu, Z}, title = {Unlocking plant metabolic diversity: A (pan)-genomic view.}, journal = {Plant communications}, volume = {3}, number = {2}, pages = {100300}, pmid = {35529944}, issn = {2590-3462}, mesh = {*Genome, Plant/genetics ; *Genomics ; Multigene Family ; Plants/genetics ; Repetitive Sequences, Nucleic Acid ; }, abstract = {Plants produce a remarkable diversity of structurally and functionally diverse natural chemicals that serve as adaptive compounds throughout their life cycles. However, unlocking this metabolic diversity is significantly impeded by the size, complexity, and abundant repetitive elements of typical plant genomes. As genome sequencing becomes routine, we anticipate that links between metabolic diversity and genetic variation will be strengthened. In addition, an ever-increasing number of plant genomes have revealed that biosynthetic gene clusters are not only a hallmark of microbes and fungi; gene clusters for various classes of compounds have also been found in plants, and many are associated with important agronomic traits. We present recent examples of plant metabolic diversification that have been discovered through the exploration and exploitation of various genomic and pan-genomic data. We also draw attention to the fundamental genomic and pan-genomic basis of plant chemodiversity and discuss challenges and future perspectives for investigating metabolic diversity in the coming pan-genomics era.}, } @article {pmid35510788, year = {2022}, author = {White, H and Vos, M and Sheppard, SK and Pascoe, B and Raymond, B}, title = {Signatures of selection in core and accessory genomes indicate different ecological drivers of diversification among Bacillus cereus clades.}, journal = {Molecular ecology}, volume = {31}, number = {13}, pages = {3584-3597}, pmid = {35510788}, issn = {1365-294X}, support = {BB/M009122/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Bacillus cereus/genetics ; Gene Transfer, Horizontal/genetics ; *Genome, Bacterial/genetics ; Phenotype ; Phylogeny ; }, abstract = {Bacterial clades are often ecologically distinct, despite extensive horizontal gene transfer (HGT). How selection works on different parts of bacterial pan-genomes to drive and maintain the emergence of clades is unclear. Focusing on the three largest clades in the diverse and well-studied Bacillus cereus sensu lato group, we identified clade-specific core genes (present in all clade members) and then used clade-specific allelic diversity to identify genes under purifying and diversifying selection. Clade-specific accessory genes (present in a subset of strains within a clade) were characterized as being under selection using presence/absence in specific clades. Gene ontology analyses of genes under selection revealed that different gene functions were enriched in different clades. Furthermore, some gene functions were enriched only amongst clade-specific core or accessory genomes. Genes under purifying selection were often clade-specific, while genes under diversifying selection showed signs of frequent HGT. These patterns are consistent with different selection pressures acting on both the core and the accessory genomes of different clades and can lead to ecological divergence in both cases. Examining variation in allelic diversity allows us to uncover genes under clade-specific selection, allowing ready identification of strains and their ecological niche.}, } @article {pmid35507269, year = {2022}, author = {Tantoso, E and Eisenhaber, B and Eisenhaber, F}, title = {Optimizing the Parametrization of Homologue Classification in the Pan-Genome Computation for a Bacterial Species: Case Study Streptococcus pyogenes.}, journal = {Methods in molecular biology (Clifton, N.J.)}, volume = {2449}, number = {}, pages = {299-324}, pmid = {35507269}, issn = {1940-6029}, mesh = {Cluster Analysis ; *Genome, Bacterial ; Genomics/methods ; Multigene Family ; Phylogeny ; *Streptococcus pyogenes/genetics ; }, abstract = {The paradigm shift associated with the introduction of the pan-genome concept has drawn the attention from singular reference genomes toward the actual sequence diversity within organism populations, strain collections, clades, etc. A single genome is no longer sufficient to describe bacteria of interest, but instead, the genomic repertoire of all existing strains is the key to the metabolic, evolutionary, or pathogenic potential of a species. The classification of orthologous genes derived from a collection of taxonomically related genome sequences is central to bacterial pan-genome computational analysis. In this work, we present a review of methods for computing pan-genome gene clusters including their comparative analysis for the case of Streptococcus pyogenes strain genomes. We exhaustively scanned the parametrization space of the homologue searching procedures and find optimal parameters (sequence identity (60%) and coverage (50-60%) in the pairwise alignment) for the orthologous clustering of gene sequences. We find that the sequence identity threshold influences the number of gene families ~3 times stronger than the sequence coverage threshold.}, } @article {pmid35501686, year = {2022}, author = {Liu, H and Zhao, W and Hua, W and Liu, J}, title = {A large-scale population based organelle pan-genomes construction and phylogeny analysis reveal the genetic diversity and the evolutionary origins of chloroplast and mitochondrion in Brassica napus L.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {339}, pmid = {35501686}, issn = {1471-2164}, support = {31871664//National Natural Science Foundation of China/ ; CAAS-ZDRW2019003//Agricultural Science and Technology Innovation Program/ ; }, mesh = {Brassica/genetics ; *Brassica napus/genetics ; Brassica rapa/genetics ; Chloroplasts/genetics ; Genetic Variation ; *Genome, Chloroplast ; *Genome, Mitochondrial ; Genome, Plant ; Mitochondria/genetics ; Phylogeny ; }, abstract = {BACKGROUND: Allotetraploid oilseed rape (Brassica napus L.) is an important worldwide oil-producing crop. The origin of rapeseed is still undetermined due to the lack of wild resources. Despite certain genetic architecture and phylogenetic studies have been done focus on large group of Brassica nuclear genomes, the organelle genomes information under global pattern is largely unknown, which provide unique material for phylogenetic studies of B. napus. Here, based on de novo assemblies of 1,579 B. napus accessions collected globally, we constructed the chloroplast and mitochondrial pan-genomes of B. napus, and investigated the genetic diversity, phylogenetic relationships of B. napus, B. rapa and B. oleracea.

RESULTS: Based on mitotype-specific markers and mitotype-variant ORFs, four main cytoplasmic haplotypes were identified in our groups corresponding the nap, pol, ole, and cam mitotypes, among which the structure of chloroplast genomes was more conserved without any rearrangement than mitochondrial genomes. A total of 2,092 variants were detected in chloroplast genomes, whereas only 326 in mitochondrial genomes, indicating that chloroplast genomes exhibited a higher level of single-base polymorphism than mitochondrial genomes. Based on whole-genome variants diversity analysis, eleven genetic difference regions among different cytoplasmic haplotypes were identified on chloroplast genomes. The phylogenetic tree incorporating accessions of the B. rapa, B. oleracea, natural and synthetic populations of B. napus revealed multiple origins of B. napus cytoplasm. The cam-type and pol-type were both derived from B. rapa, while the ole-type was originated from B. oleracea. Notably, the nap-type cytoplasm was identified in both the B. rapa population and the synthetic B. napus, suggesting that B. rapa might be the maternal ancestor of nap-type B. napus.

CONCLUSIONS: The phylogenetic results provide novel insights into the organelle genomic evolution of Brassica species. The natural rapeseeds contained at least four cytoplastic haplotypes, of which the predominant nap-type might be originated from B. rapa. Besides, the organelle pan-genomes and the overall variation data offered useful resources for analysis of cytoplasmic inheritance related agronomical important traits of rapeseed, which can substantially facilitate the cultivation and improvement of rapeseed varieties.}, } @article {pmid35498663, year = {2022}, author = {Burridge, AJ and Winfield, MO and Wilkinson, PA and Przewieslik-Allen, AM and Edwards, KJ and Barker, GLA}, title = {The Use and Limitations of Exome Capture to Detect Novel Variation in the Hexaploid Wheat Genome.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {841855}, pmid = {35498663}, issn = {1664-462X}, abstract = {The bread wheat (Triticum aestivum) pangenome is a patchwork of variable regions, including translocations and introgressions from progenitors and wild relatives. Although a large number of these have been documented, it is likely that many more remain unknown. To map these variable regions and make them more traceable in breeding programs, wheat accessions need to be genotyped or sequenced. The wheat genome is large and complex and consequently, sequencing efforts are often targeted through exome capture. In this study, we employed exome capture prior to sequencing 12 wheat varieties; 10 elite T. aestivum cultivars and two T. aestivum landrace accessions. Sequence coverage across chromosomes was greater toward distal regions of chromosome arms and lower in centromeric regions, reflecting the capture probe distribution which itself is determined by the known telomere to centromere gene gradient. Superimposed on this general pattern, numerous drops in sequence coverage were observed. Several of these corresponded with reported introgressions. Other drops in coverage could not be readily explained and may point to introgressions that have not, to date, been documented.}, } @article {pmid35496987, year = {2022}, author = {Nwaiwu, O}, title = {Comparative genome analysis of the first Listeria monocytogenes core genome multi-locus sequence types CT2050 AND CT2051 strains with their close relatives.}, journal = {AIMS microbiology}, volume = {8}, number = {1}, pages = {61-72}, pmid = {35496987}, issn = {2471-1888}, abstract = {Genome sequences of the three strains of L. monocytogenes, which are the first core genome multi-locus sequence types (cgMLST) 2050 and 2051 were reviewed and compared with 21 close relatives and reference genomes. Using a pan-genomic approach to analyse whole genome sequences, it was found that the strains consisted of approximately 2200 shared genes and a much greater pool of genes present as an accessory genome. An unknown transmissible sequence of approximately 91 kb harbouring bacitracin resistance genes found in strain LmNG2 (1/2b) was revealed to be an Inc18 plasmid. The CT2051, strain LmNG3 (1/2a) haboured more unique genes (252 vs 230) than the well-known reference strain LmEGD-e (1/2a). More studies to monitor new strains can help reduce food-borne outbreaks.}, } @article {pmid35493726, year = {2022}, author = {Song, Y and Xu, X and Huang, Z and Xiao, Y and Yu, K and Jiang, M and Yin, S and Zheng, M and Meng, H and Han, Y and Wang, Y and Wang, D and Wei, Q}, title = {Corrigendum: Genomic Characteristics and Pan-Genome Analysis of Rhodococcus equi.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {884441}, doi = {10.3389/fcimb.2022.884441}, pmid = {35493726}, issn = {2235-2988}, abstract = {[This corrects the article DOI: 10.3389/fcimb.2022.807610.].}, } @article {pmid35489163, year = {2022}, author = {Mohd Saad, NS and Neik, TX and Thomas, WJW and Amas, JC and Cantila, AY and Craig, RJ and Edwards, D and Batley, J}, title = {Advancing designer crops for climate resilience through an integrated genomics approach.}, journal = {Current opinion in plant biology}, volume = {67}, number = {}, pages = {102220}, doi = {10.1016/j.pbi.2022.102220}, pmid = {35489163}, issn = {1879-0356}, mesh = {Climate Change ; *Crops, Agricultural/genetics ; Domestication ; Genomics ; *Plant Breeding/methods ; }, abstract = {Climate change and exponential population growth are exposing an immediate need for developing future crops that are highly resilient and adaptable to changing environments to maintain global food security in the next decade. Rigorous selection from long domestication history has rendered cultivated crops genetically disadvantaged, raising concerns in their ability to adapt to these new challenges and limiting their usefulness in breeding programmes. As a result, future crop improvement efforts must rely on integrating various genomic strategies ranging from high-throughput sequencing to machine learning, in order to exploit germplasm diversity and overcome bottlenecks created by domestication, expansive multi-dimensional phenotypes, arduous breeding processes, complex traits and big data.}, } @article {pmid35488861, year = {2022}, author = {Wang, Z and Rouard, M and Biswas, MK and Droc, G and Cui, D and Roux, N and Baurens, FC and Ge, XJ and Schwarzacher, T and Heslop-Harrison, PJS and Liu, Q}, title = {A chromosome-level reference genome of Ensete glaucum gives insight into diversity and chromosomal and repetitive sequence evolution in the Musaceae.}, journal = {GigaScience}, volume = {11}, number = {}, pages = {}, pmid = {35488861}, issn = {2047-217X}, mesh = {Chromosomes ; DNA Copy Number Variations ; DNA Transposable Elements ; *Musa/genetics ; *Musaceae/genetics ; Plant Breeding ; Retroelements ; Sequence Analysis, DNA ; }, abstract = {BACKGROUND: Ensete glaucum (2n = 2x = 18) is a giant herbaceous monocotyledonous plant in the small Musaceae family along with banana (Musa). A high-quality reference genome sequence assembly of E. glaucum is a resource for functional and evolutionary studies of Ensete, Musaceae, and the Zingiberales.

FINDINGS: Using Oxford Nanopore Technologies, chromosome conformation capture (Hi-C), Illumina and RNA survey sequence, supported by molecular cytogenetics, we report a high-quality 481.5 Mb genome assembly with 9 pseudo-chromosomes and 36,836 genes. A total of 55% of the genome is composed of repetitive sequences with predominantly LTR-retroelements (37%) and DNA transposons (7%). The single 5S ribosomal DNA locus had an exceptionally long monomer length of 1,056 bp, more than twice that of the monomers at multiple loci in Musa. A tandemly repeated satellite (1.1% of the genome, with no similar sequence in Musa) was present around all centromeres, together with a few copies of a long interspersed nuclear element (LINE) retroelement. The assembly enabled us to characterize in detail the chromosomal rearrangements occurring between E. glaucum and the x = 11 species of Musa. One E. glaucum chromosome has the same gene content as Musa acuminata, while others show multiple, complex, but clearly defined evolutionary rearrangements in the change between x= 9 and 11.

CONCLUSIONS: The advance towards a Musaceae pangenome including E. glaucum, tolerant of extreme environments, makes a complete set of gene alleles, copy number variation, and a reference for structural variation available for crop breeding and understanding environmental responses. The chromosome-scale genome assembly shows the nature of chromosomal fusion and translocation events during speciation, and features of rapid repetitive DNA change in terms of copy number, sequence, and genomic location, critical to understanding its role in diversity and evolution.}, } @article {pmid35483961, year = {2022}, author = {Markello, C and Huang, C and Rodriguez, A and Carroll, A and Chang, PC and Eizenga, J and Markello, T and Haussler, D and Paten, B}, title = {A complete pedigree-based graph workflow for rare candidate variant analysis.}, journal = {Genome research}, volume = {32}, number = {5}, pages = {893-903}, pmid = {35483961}, issn = {1549-5469}, mesh = {*Genome ; High-Throughput Nucleotide Sequencing ; INDEL Mutation ; Pedigree ; *Polymorphism, Single Nucleotide ; Software ; Workflow ; }, abstract = {Methods that use a linear genome reference for genome sequencing data analysis are reference-biased. In the field of clinical genetics for rare diseases, a resulting reduction in genotyping accuracy in some regions has likely prevented the resolution of some cases. Pangenome graphs embed population variation into a reference structure. Although pangenome graphs have helped to reduce reference mapping bias, further performance improvements are possible. We introduce VG-Pedigree, a pedigree-aware workflow based on the pangenome-mapping tool of Giraffe and the variant calling tool DeepTrio using a specially trained model for Giraffe-based alignments. We demonstrate mapping and variant calling improvements in both single-nucleotide variants (SNVs) and insertion and deletion (indel) variants over those produced by alignments created using BWA-MEM to a linear-reference and Giraffe mapping to a pangenome graph containing data from the 1000 Genomes Project. We have also adapted and upgraded deleterious-variant (DV) detecting methods and programs into a streamlined workflow. We used these workflows in combination to detect small lists of candidate DVs among 15 family quartets and quintets of the Undiagnosed Diseases Program (UDP). All candidate DVs that were previously diagnosed using the Mendelian models covered by the previously published methods were recapitulated by these workflows. The results of these experiments indicate that a slightly greater absolute count of DVs are detected in the proband population than in their matched unaffected siblings.}, } @article {pmid35483110, year = {2022}, author = {Alotaibi, G and Khan, K and Al Mouslem, AK and Ahmad Khan, S and Naseer Abbas, M and Abbas, M and Ali Shah, S and Jalal, K}, title = {Pan genome based reverse vaccinology approach to explore Enterococcus faecium (VRE) strains for identification of novel multi-epitopes vaccine candidate.}, journal = {Immunobiology}, volume = {227}, number = {3}, pages = {152221}, doi = {10.1016/j.imbio.2022.152221}, pmid = {35483110}, issn = {1878-3279}, mesh = {Aged ; Child ; Computational Biology ; *Enterococcus faecium/genetics ; Epitopes, T-Lymphocyte/genetics ; Humans ; Molecular Docking Simulation ; Vaccines, Subunit ; *Vaccinology ; Vancomycin Resistance ; }, abstract = {Enterococcus faecium is regarded as fourth most emerging common pathogen causing hospital acquired infections (HAIs), with high mortality rate, especially in children, elderly and immunocompromised patients. Recently, due to the emergence of E. faecium resistant strains especially vancomycin resistance (VRE) and their continuously growing resistivity to antibiotics, design of safe vaccine remains a choice for its control. Alternative control through vaccination has received much attention, but there is no clinically approved vaccine against this pathogen. Therefore, in current study we have applied a triple helix approach i.e., Pan-genome, subtractive genome and reverse vaccinology to identify and design potential vaccine candidates and multiepitope-based vaccine (MEV) construct against E. faecium (via core genome analysis from 216 strains). In this study, only 2 outer membrane proteins were identified through genome subtraction of resistant strains genes against human and essential proteins. Subsequently, phosphate ABC transporter substrate binding protein (Psts) was selected as a promiscuous vaccine candidate to develop a potent vaccine model. A final of four epitopes from CD8 + T-cell, CD4 + T-cell epitopes, and B-cell were shortlisted from outer membrane protein with highly antigenic, IFN-γ inducer, and overlapping characteristics for the construction of twelve vaccine models. The V3 construct was found to be highly immunogenic, non-toxic, non-allergenic, highly antigenic and most stable in terms of molecular docking and simulation studies against six HLAs, TLR2, and TLR4 complex. So far, this protein and multiepitope have never been characterized as vaccine targets against E. faecium. The current study proposed V3 as a significant vaccine candidate that could help the scientific community to treat E. faecium infections.}, } @article {pmid35481758, year = {2022}, author = {Wu, J and NicAogáin, K and McAuliffe, O and Jordan, K and O'Byrne, C}, title = {Phylogenetic and Phenotypic Analyses of a Collection of Food and Clinical Listeria monocytogenes Isolates Reveal Loss of Function of Sigma B from Several Clonal Complexes.}, journal = {Applied and environmental microbiology}, volume = {88}, number = {10}, pages = {e0005122}, pmid = {35481758}, issn = {1098-5336}, mesh = {*Bacterial Proteins/genetics ; Food Microbiology ; *Listeria monocytogenes/genetics ; Phenotype ; Phylogeny ; *Sigma Factor/genetics ; }, abstract = {To understand the molecular mechanisms that contribute to the stress responses of the important foodborne pathogen Listeria monocytogenes, we collected 139 strains (meat, n = 25; dairy, n = 10; vegetable, n = 8; seafood, n = 14; mixed food, n = 4; and food processing environments, n = 78), mostly isolated in Ireland, and subjected them to whole-genome sequencing. These strains were compared to 25 Irish clinical isolates and 4 well-studied reference strains. Core genome and pan-genome analysis confirmed a highly clonal and deeply branched population structure. Multilocus sequence typing showed that this collection contained a diverse range of strains from L. monocytogenes lineages I and II. Several groups of isolates with highly similar genome content were traced to single or multiple food business operators, providing evidence of strain persistence or prevalence, respectively. Phenotypic screening assays for tolerance to salt stress and resistance to acid stress revealed variants within several clonal complexes that were phenotypically distinct. Five of these phenotypic outliers were found to carry mutations in the sigB operon, which encodes the stress-inducible sigma factor sigma B. Transcriptional analysis confirmed that three of the strains that carried mutations in sigB, rsbV, or rsbU had reduced SigB activity, as predicted. These strains exhibited increased tolerance to salt stress and displayed decreased resistance to low pH stress. Overall, this study shows that loss-of-function mutations in the sigB operon are comparatively common in field isolates, probably reflecting the cost of the general stress response to reproductive fitness in this pathogen. IMPORTANCE The bacterial foodborne pathogen Listeria monocytogenes frequently contaminates various categories of food products and is able to cause life-threatening infections when ingested by humans. Thus, it is important to control the growth of this bacterium in food by understanding the mechanisms that allow its proliferation under suboptimal conditions. In this study, intraspecies heterogeneity in stress response was observed across a collection consisting of mainly Irish L. monocytogenes isolates. Through comparisons of genome sequence and phenotypes observed, we identified three strains with impairment of the general stress response regulator SigB. Two of these strains are used widely in food challenge studies for evaluating the growth potential of L. monocytogenes. Given that loss of SigB function is associated with atypical phenotypic properties, the use of these strains in food challenge studies should be re-evaluated.}, } @article {pmid35479110, year = {2022}, author = {de Sá, PHCG and Castro Alves, JT and Veras, AAO}, title = {Protocol to analyze the bacterial pangenome using PAN2HGENE software.}, journal = {STAR protocols}, volume = {3}, number = {2}, pages = {101327}, pmid = {35479110}, issn = {2666-1667}, mesh = {*Bacteria ; Genome ; Prokaryotic Cells ; *Software ; }, abstract = {The PAN2HGENE is a computational tool that enables two main analyses. First, the tool can identify gene products absent from the original prokaryotic genome sequence. Second, it enables automated comparative analysis for both complete and draft genomes. All analyses are performed through a simple and intuitive graphical user interface without the need for extensive and complex command lines. For complete details on the use and execution of this protocol, please refer to Silva de Oliveira (2021).}, } @article {pmid35478716, year = {2022}, author = {Ode, H and Nakata, Y and Nagashima, M and Hayashi, M and Yamazaki, T and Asakura, H and Suzuki, J and Kubota, M and Matsuoka, K and Matsuda, M and Mori, M and Sugimoto, A and Imahashi, M and Yokomaku, Y and Sadamasu, K and Iwatani, Y}, title = {Molecular epidemiological features of SARS-CoV-2 in Japan, 2020-1.}, journal = {Virus evolution}, volume = {8}, number = {1}, pages = {veac034}, pmid = {35478716}, issn = {2057-1577}, abstract = {There were five epidemic waves of coronavirus disease 2019 in Japan between 2020 and 2021. It remains unclear how the domestic waves arose and abated. To better understand this, we analyzed the pangenomic sequences of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) and characterized the molecular epidemiological features of the five epidemic waves in Japan. In this study, we performed deep sequencing to determine the pangenomic SARS-CoV-2 sequences of 1,286 samples collected in two cities far from each other, Tokyo Metropolis and Nagoya. Then, the spatiotemporal genetic changes of the obtained sequences were compared with the sequences available in the Global Initiative on Sharing All Influenza Data (GISAID) database. A total of 873 genotypes carrying different sets of mutations were identified in the five epidemic waves. Phylogenetic analysis demonstrated that sharp displacements of lineages and genotypes occurred between consecutive waves over the 2 years. In addition, a wide variety of genotypes were observed in the early half of each wave, whereas a few genotypes were detected across Japan during an entire wave. Phylogenetically, putative descendant genotypes observed late in each wave displayed regional clustering and evolution in Japan. The genetic diversity of SARS-CoV-2 displayed uneven dynamics during each epidemic wave in Japan. Our findings provide an important molecular epidemiological basis to aid in controlling future SARS-CoV-2 epidemics.}, } @article {pmid35476524, year = {2022}, author = {Norsigian, CJ and Danhof, HA and Brand, CK and Midani, FS and Broddrick, JT and Savidge, TC and Britton, RA and Palsson, BO and Spinler, JK and Monk, JM}, title = {Systems biology approach to functionally assess the Clostridioides difficile pangenome reveals genetic diversity with discriminatory power.}, journal = {Proceedings of the National Academy of Sciences of the United States of America}, volume = {119}, number = {18}, pages = {e2119396119}, pmid = {35476524}, issn = {1091-6490}, support = {U01 AI124290/AI/NIAID NIH HHS/United States ; U01 AI124316/AI/NIAID NIH HHS/United States ; F32 AI136404/AI/NIAID NIH HHS/United States ; R01 AI123278/AI/NIAID NIH HHS/United States ; P30 DK056338/DK/NIDDK NIH HHS/United States ; 1U01AI12429//HHS | NIH | National Institute of Allergy and Infectious Diseases (NIAID)/ ; T32 DK007664/DK/NIDDK NIH HHS/United States ; }, mesh = {Clostridioides ; *Clostridioides difficile/genetics ; *Cross Infection ; Genetic Variation ; Humans ; Systems Biology ; }, abstract = {Combatting Clostridioides difficile infections, a dominant cause of hospital-associated infections with incidence and resulting deaths increasing worldwide, is complicated by the frequent emergence of new virulent strains. Here, we employ whole-genome sequencing, high-throughput phenotypic screenings, and genome-scale models of metabolism to evaluate the genetic diversity of 451 strains of C. difficile. Constructing the C. difficile pangenome based on this set revealed 9,924 distinct gene clusters, of which 2,899 (29%) are defined as core, 2,968 (30%) are defined as unique, and the remaining 4,057 (41%) are defined as accessory. We develop a strain typing method, sequence typing by accessory genome (STAG), that identifies 176 genetically distinct groups of strains and allows for explicit interrogation of accessory gene content. Thirty-five strains representative of the overall set were experimentally profiled on 95 different nutrient sources, revealing 26 distinct growth profiles and unique nutrient preferences; 451 strain-specific genome scale models of metabolism were constructed, allowing us to computationally probe phenotypic diversity in 28,864 unique conditions. The models create a mechanistic link between the observed phenotypes and strain-specific genetic differences and exhibit an ability to correctly predict growth in 76% of measured cases. The typing and model predictions are used to identify and contextualize discriminating genetic features and phenotypes that may contribute to the emergence of new problematic strains.}, } @article {pmid35475644, year = {2022}, author = {Adomako, M and Ernst, D and Simkovsky, R and Chao, YY and Wang, J and Fang, M and Bouchier, C and Lopez-Igual, R and Mazel, D and Gugger, M and Golden, SS}, title = {Comparative Genomics of Synechococcus elongatus Explains the Phenotypic Diversity of the Strains.}, journal = {mBio}, volume = {13}, number = {3}, pages = {e0086222}, pmid = {35475644}, issn = {2150-7511}, support = {F32 GM130070/GM/NIGMS NIH HHS/United States ; R35 GM118290/GM/NIGMS NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; }, mesh = {Bacterial Proteins/genetics/metabolism ; Genomics ; Phenotype ; Photosynthesis ; *Synechococcus/metabolism ; }, abstract = {Strains of the freshwater cyanobacterium Synechococcus elongatus were first isolated approximately 60 years ago, and PCC 7942 is well established as a model for photosynthesis, circadian biology, and biotechnology research. The recent isolation of UTEX 3055 and subsequent discoveries in biofilm and phototaxis phenotypes suggest that lab strains of S. elongatus are highly domesticated. We performed a comprehensive genome comparison among the available genomes of S. elongatus and sequenced two additional laboratory strains to trace the loss of native phenotypes from the standard lab strains and determine the genetic basis of useful phenotypes. The genome comparison analysis provides a pangenome description of S. elongatus, as well as correction of extensive errors in the published sequence for the type strain PCC 6301. The comparison of gene sets and single nucleotide polymorphisms (SNPs) among strains clarifies strain isolation histories and, together with large-scale genome differences, supports a hypothesis of laboratory domestication. Prophage genes in laboratory strains, but not UTEX 3055, affect pigmentation, while unique genes in UTEX 3055 are necessary for phototaxis. The genomic differences identified in this study include previously reported SNPs that are, in reality, sequencing errors, as well as SNPs and genome differences that have phenotypic consequences. One SNP in the circadian response regulator rpaA that has caused confusion is clarified here as belonging to an aberrant clone of PCC 7942, used for the published genome sequence, that has confounded the interpretation of circadian fitness research. IMPORTANCE Synechococcus elongatus is a versatile and robust model cyanobacterium for photosynthetic metabolism and circadian biology research, with utility as a biological production platform. We compared the genomes of closely related S. elongatus strains to create a pangenome annotation to aid gene discovery for novel phenotypes. The comparative genomic analysis revealed the need for a new sequence of the species type strain PCC 6301 and includes two new sequences for S. elongatus strains PCC 6311 and PCC 7943. The genomic comparison revealed a pattern of early laboratory domestication of strains, clarifies the relationship between the strains PCC 6301 and UTEX 2973, and showed that differences in large prophage regions, operons, and even single nucleotides have effects on phenotypes as wide-ranging as pigmentation, phototaxis, and circadian gene expression.}, } @article {pmid35474671, year = {2021}, author = {Ferrés, I and Iraola, G}, title = {An object-oriented framework for evolutionary pangenome analysis.}, journal = {Cell reports methods}, volume = {1}, number = {5}, pages = {100085}, pmid = {35474671}, issn = {2667-2375}, mesh = {*Genomics ; *Ecosystem ; Genome, Bacterial/genetics ; Biological Evolution ; Escherichia coli/genetics ; }, abstract = {Pangenome analysis is fundamental to explore molecular evolution occurring in bacterial populations. Here, we introduce Pagoo, an R framework that enables straightforward handling of pangenome data. The encapsulated nature of Pagoo allows the storage of complex molecular and phenotypic information using an object-oriented approach. This facilitates to go back and forward to the data using a single programming environment and saving any stage of analysis (including the raw data) in a single file, making it sharable and reproducible. Pagoo provides tools to query, subset, compare, visualize, and perform statistical analyses, in concert with other microbial genomics packages available in the R ecosystem. As working examples, we used 1,000 Escherichia coli genomes to show that Pagoo is scalable, and a global dataset of Campylobacter fetus genomes to identify evolutionary patterns and genomic markers of host-adaptation in this pathogen.}, } @article {pmid35469019, year = {2022}, author = {Rhodes, J and Abdolrasouli, A and Dunne, K and Sewell, TR and Zhang, Y and Ballard, E and Brackin, AP and van Rhijn, N and Chown, H and Tsitsopoulou, A and Posso, RB and Chotirmall, SH and McElvaney, NG and Murphy, PG and Talento, AF and Renwick, J and Dyer, PS and Szekely, A and Bowyer, P and Bromley, MJ and Johnson, EM and Lewis White, P and Warris, A and Barton, RC and Schelenz, S and Rogers, TR and Armstrong-James, D and Fisher, MC}, title = {Population genomics confirms acquisition of drug-resistant Aspergillus fumigatus infection by humans from the environment.}, journal = {Nature microbiology}, volume = {7}, number = {5}, pages = {663-674}, pmid = {35469019}, issn = {2058-5276}, support = {219551/Z/19/Z/WT_/Wellcome Trust/United Kingdom ; MR/N006364/2/MRC_/Medical Research Council/United Kingdom ; 097377/WT_/Wellcome Trust/United Kingdom ; /DH_/Department of Health/United Kingdom ; MR/V033417/1/MRC_/Medical Research Council/United Kingdom ; MR/R015600/1/MRC_/Medical Research Council/United Kingdom ; /WT_/Wellcome Trust/United Kingdom ; MR/V037315/1/MRC_/Medical Research Council/United Kingdom ; BB/M010996/1/BB_/Biotechnology and Biological Sciences Research Council/United Kingdom ; }, mesh = {*Anti-Infective Agents ; *Aspergillus fumigatus/genetics ; Azoles/pharmacology ; Drug Resistance, Fungal/genetics ; Humans ; Metagenomics ; Microbial Sensitivity Tests ; }, abstract = {Infections caused by the fungal pathogen Aspergillus fumigatus are increasingly resistant to first-line azole antifungal drugs. However, despite its clinical importance, little is known about how susceptible patients acquire infection from drug-resistant genotypes in the environment. Here, we present a population genomic analysis of 218 A. fumigatus isolates from across the UK and Ireland (comprising 153 clinical isolates from 143 patients and 65 environmental isolates). First, phylogenomic analysis shows strong genetic structuring into two clades (A and B) with little interclade recombination and the majority of environmental azole resistance found within clade A. Second, we show occurrences where azole-resistant isolates of near-identical genotypes were obtained from both environmental and clinical sources, indicating with high confidence the infection of patients with resistant isolates transmitted from the environment. Third, genome-wide scans identified selective sweeps across multiple regions indicating a polygenic basis to the trait in some genetic backgrounds. These signatures of positive selection are seen for loci containing the canonical genes encoding fungicide resistance in the ergosterol biosynthetic pathway, while other regions under selection have no defined function. Lastly, pan-genome analysis identified genes linked to azole resistance and previously unknown resistance mechanisms. Understanding the environmental drivers and genetic basis of evolving fungal drug resistance needs urgent attention, especially in light of increasing numbers of patients with severe viral respiratory tract infections who are susceptible to opportunistic fungal superinfections.}, } @article {pmid35469007, year = {2022}, author = {Shi, YM and Hirschmann, M and Shi, YN and Ahmed, S and Abebew, D and Tobias, NJ and Grün, P and Crames, JJ and Pöschel, L and Kuttenlochner, W and Richter, C and Herrmann, J and Müller, R and Thanwisai, A and Pidot, SJ and Stinear, TP and Groll, M and Kim, Y and Bode, HB}, title = {Global analysis of biosynthetic gene clusters reveals conserved and unique natural products in entomopathogenic nematode-symbiotic bacteria.}, journal = {Nature chemistry}, volume = {14}, number = {6}, pages = {701-712}, pmid = {35469007}, issn = {1755-4349}, mesh = {Animals ; *Biological Products ; Humans ; Insecta/genetics/microbiology ; Multigene Family ; *Nematoda/genetics/microbiology ; *Photorhabdus/genetics ; Symbiosis/genetics ; *Xenorhabdus/genetics ; }, abstract = {Microorganisms contribute to the biology and physiology of eukaryotic hosts and affect other organisms through natural products. Xenorhabdus and Photorhabdus (XP) living in mutualistic symbiosis with entomopathogenic nematodes generate natural products to mediate bacteria-nematode-insect interactions. However, a lack of systematic analysis of the XP biosynthetic gene clusters (BGCs) has limited the understanding of how natural products affect interactions between the organisms. Here we combine pangenome and sequence similarity networks to analyse BGCs from 45 XP strains that cover all sequenced strains in our collection and represent almost all XP taxonomy. The identified 1,000 BGCs belong to 176 families. The most conserved families are denoted by 11 BGC classes. We homologously (over)express the ubiquitous and unique BGCs and identify compounds featuring unusual architectures. The bioactivity evaluation demonstrates that the prevalent compounds are eukaryotic proteasome inhibitors, virulence factors against insects, metallophores and insect immunosuppressants. These findings explain the functional basis of bacterial natural products in this tripartite relationship.}, } @article {pmid35464848, year = {2022}, author = {Jha, UC and Nayyar, H and Parida, SK and Bakır, M and von Wettberg, EJB and Siddique, KHM}, title = {Progress of Genomics-Driven Approaches for Sustaining Underutilized Legume Crops in the Post-Genomic Era.}, journal = {Frontiers in genetics}, volume = {13}, number = {}, pages = {831656}, pmid = {35464848}, issn = {1664-8021}, abstract = {Legume crops, belonging to the Fabaceae family, are of immense importance for sustaining global food security. Many legumes are profitable crops for smallholder farmers due to their unique ability to fix atmospheric nitrogen and their intrinsic ability to thrive on marginal land with minimum inputs and low cultivation costs. Recent progress in genomics shows promise for future genetic gains in major grain legumes. Still it remains limited in minor legumes/underutilized legumes, including adzuki bean, cluster bean, horse gram, lathyrus, red clover, urd bean, and winged bean. In the last decade, unprecedented progress in completing genome assemblies of various legume crops and resequencing efforts of large germplasm collections has helped to identify the underlying gene(s) for various traits of breeding importance for enhancing genetic gain and contributing to developing climate-resilient cultivars. This review discusses the progress of genomic resource development, including genome-wide molecular markers, key breakthroughs in genome sequencing, genetic linkage maps, and trait mapping for facilitating yield improvement in underutilized legumes. We focus on 1) the progress in genomic-assisted breeding, 2) the role of whole-genome resequencing, pangenomes for underpinning the novel genomic variants underlying trait gene(s), 3) how adaptive traits of wild underutilized legumes could be harnessed to develop climate-resilient cultivars, 4) the progress and status of functional genomics resources, deciphering the underlying trait candidate genes with putative function in underutilized legumes 5) and prospects of novel breeding technologies, such as speed breeding, genomic selection, and genome editing. We conclude the review by discussing the scope for genomic resources developed in underutilized legumes to enhance their production and play a critical role in achieving the "zero hunger" sustainable development goal by 2030 set by the United Nations.}, } @article {pmid35456775, year = {2022}, author = {Lau Vetter, MCY and Huang, B and Fenske, L and Blom, J}, title = {Metabolism of the Genus Guyparkeria Revealed by Pangenome Analysis.}, journal = {Microorganisms}, volume = {10}, number = {4}, pages = {}, pmid = {35456775}, issn = {2076-2607}, support = {2018YFC0309904-02//National Key R&D Program of China/ ; }, abstract = {Halophilic sulfur-oxidizing bacteria belonging to the genus Guyparkeria occur at both marine and terrestrial habitats. Common physiological characteristics displayed by Guyparkeria isolates have not yet been linked to the metabolic potential encoded in their genetic inventory. To provide a genetic basis for understanding the metabolism of Guyparkeria, nine genomes were compared to reveal the metabolic capabilities and adaptations. A detailed account is given on Guyparkeria's ability to assimilate carbon by fixation, to oxidize reduced sulfur, to oxidize thiocyanate, and to cope with salinity stress.}, } @article {pmid35456751, year = {2022}, author = {Néron, B and Littner, E and Haudiquet, M and Perrin, A and Cury, J and Rocha, EPC}, title = {IntegronFinder 2.0: Identification and Analysis of Integrons across Bacteria, with a Focus on Antibiotic Resistance in Klebsiella.}, journal = {Microorganisms}, volume = {10}, number = {4}, pages = {}, pmid = {35456751}, issn = {2076-2607}, support = {ANR-16-CONV-0005//Agence Nationale de la Recherche/ ; ANR-10-LABX-62-IBEID//Agence Nationale de la Recherche/ ; EQU201903007835//Fondation pour la Recherche Médicale/ ; }, abstract = {Integrons are flexible gene-exchanging platforms that contain multiple cassettes encoding accessory genes whose order is shuffled by a specific integrase. Integrons embedded within mobile genetic elements often contain multiple antibiotic resistance genes that they spread among nosocomial pathogens and contribute to the current antibiotic resistance crisis. However, most integrons are presumably sedentary and encode a much broader diversity of functions. IntegronFinder is a widely used software to identify novel integrons in bacterial genomes, but has aged and lacks some useful functionalities to handle very large datasets of draft genomes or metagenomes. Here, we present IntegronFinder version 2. We have updated the code, improved its efficiency and usability, adapted the output to incomplete genome data, and added a few novel functions. We describe these changes and illustrate the relevance of the program by analyzing the distribution of integrons across more than 20,000 fully sequenced genomes. We also take full advantage of its novel capabilities to analyze close to 4000 Klebsiella pneumoniae genomes for the presence of integrons and antibiotic resistance genes within them. Our data show that K. pneumoniae has a large diversity of integrons and the largest mobile integron in our database of plasmids. The pangenome of these integrons contains a total of 165 different gene families with most of the largest families being related with resistance to numerous types of antibiotics. IntegronFinder is a free and open-source software available on multiple public platforms.}, } @article {pmid35456404, year = {2022}, author = {Aggarwal, SK and Singh, A and Choudhary, M and Kumar, A and Rakshit, S and Kumar, P and Bohra, A and Varshney, RK}, title = {Pangenomics in Microbial and Crop Research: Progress, Applications, and Perspectives.}, journal = {Genes}, volume = {13}, number = {4}, pages = {}, pmid = {35456404}, issn = {2073-4425}, mesh = {Chromosome Mapping ; Humans ; *Plant Breeding ; *Plants/genetics ; }, abstract = {Advances in sequencing technologies and bioinformatics tools have fueled a renewed interest in whole genome sequencing efforts in many organisms. The growing availability of multiple genome sequences has advanced our understanding of the within-species diversity, in the form of a pangenome. Pangenomics has opened new avenues for future research such as allowing dissection of complex molecular mechanisms and increased confidence in genome mapping. To comprehensively capture the genetic diversity for improving plant performance, the pangenome concept is further extended from species to genus level by the inclusion of wild species, constituting a super-pangenome. Characterization of pangenome has implications for both basic and applied research. The concept of pangenome has transformed the way biological questions are addressed. From understanding evolution and adaptation to elucidating host-pathogen interactions, finding novel genes or breeding targets to aid crop improvement to design effective vaccines for human prophylaxis, the increasing availability of the pangenome has revolutionized several aspects of biological research. The future availability of high-resolution pangenomes based on reference-level near-complete genome assemblies would greatly improve our ability to address complex biological problems.}, } @article {pmid35451954, year = {2022}, author = {Yu, J and Xu, X and Wang, Y and Zhai, X and Pan, Z and Jiao, X and Zhang, Y}, title = {Prophage-mediated genome differentiation of the Salmonella Derby ST71 population.}, journal = {Microbial genomics}, volume = {8}, number = {4}, pages = {}, pmid = {35451954}, issn = {2057-5858}, mesh = {Animals ; *Genome, Bacterial/genetics ; Niacinamide/analogs & derivatives ; Poultry/genetics ; *Prophages/genetics ; Salmonella/genetics ; Swine ; }, abstract = {Although Salmonella Derby ST71 strains have been recognized as poultry-specific by previous studies, multiple swine-associated S. Derby ST71 strains were identified in this long-term, multi-site epidemic study. Here, 15 representative swine-associated S. Derby ST71 strains were sequenced and compared with 65 (one swine-associated and 64 poultry-associated) S. Derby ST71 strains available in the NCBI database at a pangenomic level through comparative genomics analysis to identify genomic features related to the differentiation of swine-associated strains and previously reported poultry-associated strains. The distribution patterns of known Salmonella pathogenicity islands (SPIs) and virulence factor (VF) encoding genes were not capable of differentiating between the two strain groups. The results demonstrated that the S. Derby ST71 population harbours an open pan-genome, and swine-associated ST71 strains contain many more genes than the poultry-associated strains, mainly attributed to the prophage sequence contents in the genomes. The numbers of prophage sequences identified in the swine-associated strains were higher than those in the poultry-associated strains. Prophages specifically harboured by the swine-associated strains were found to contain genes that facilitate niche adaptation for the bacterial hosts. Gene deletion experiments revealed that the dam gene specifically present in the prophage of the swine-associated strains is important for S. Derby to adhere onto the host cells. This study provides novel insights into the roles of prophages during the genome differentiation of Salmonella.}, } @article {pmid35447921, year = {2022}, author = {Jv, Y and Xi, C and Zhao, Y and Wang, W and Zhang, Y and Liu, K and Liu, W and Shan, K and Wang, C and Cao, R and Dai, C and Jv, Y and Zhu, W and Wang, H and He, Q and Hao, L}, title = {Pan-Genomic and Transcriptomic Analyses of Marine Pseudoalteromonas agarivorans Hao 2018 Revealed Its Genomic and Metabolic Features.}, journal = {Marine drugs}, volume = {20}, number = {4}, pages = {}, pmid = {35447921}, issn = {1660-3397}, support = {2020KJC-ZD08//Science, Education and Industry Integration Innovation Pilot Project of Qilu University of Tech-nology (Shandong Academy of Sciences)/ ; 2020KJC-ZD10//Science, Education and Industry Integration Innovation Pilot Project of Qilu University of Tech-nology (Shandong Academy of Sciences)/ ; No. 202002//Foundation of Qilu University of Technology of ESI Cultivating Subject for Biology and Biochemistry/ ; No. 2019JZZY021020//Foundation of Key R&D Program of Shandong Province/ ; ZR2021ZD29//Major Basic Research Project of Natural Science Foundation of Shandong Province/ ; No. ZZ20190302//Foundation of State Key Laboratory of Biobased Material and Green Papermaking/ ; SWCG 2018-01//Foundation of Shandong Provincial Key Laboratory of Biosensors/ ; ZR2012CM019//Natural Science Foundation of Shandong Province/ ; }, mesh = {Carbohydrates ; Genome, Bacterial/genetics ; Genomics ; Phylogeny ; *Pseudoalteromonas/genetics/metabolism ; *Transcriptome ; }, abstract = {The genomic and carbohydrate metabolic features of Pseudoalteromonas agarivorans Hao 2018 (P. agarivorans Hao 2018) were investigated through pan-genomic and transcriptomic analyses, and key enzyme genes that may encode the process involved in its extracellular polysaccharide synthesis were screened. The pan-genome of the P. agarivorans strains consists of a core-genome containing 2331 genes, an accessory-genome containing 956 genes, and a unique-genome containing 1519 genes. Clusters of Orthologous Groups analyses showed that P. agarivorans harbors strain-specifically diverse metabolisms, probably representing high evolutionary genome changes. The Kyoto Encyclopedia of Genes and Genomes and reconstructed carbohydrate metabolic pathways displayed that P. agarivorans strains can utilize a variety of carbohydrates, such as d-glucose, d-fructose, and d-lactose. Analyses of differentially expressed genes showed that compared with the stationary phase (24 h), strain P. agarivorans Hao 2018 had upregulated expression of genes related to the synthesis of extracellular polysaccharides in the logarithmic growth phase (2 h), and that the expression of these genes affected extracellular polysaccharide transport, nucleotide sugar synthesis, and glycosyltransferase synthesis. This is the first investigation of the genomic and metabolic features of P. agarivorans through pan-genomic and transcriptomic analyses, and these intriguing discoveries provide the possibility to produce novel marine drug lead compounds with high biological activity.}, } @article {pmid35446150, year = {2022}, author = {Liu, Y and Pei, T and Du, J and Yao, Q and Deng, MR and Zhu, H}, title = {Comparative Genomics Reveals Genetic Diversity and Metabolic Potentials of the Genus Qipengyuania and Suggests Fifteen Novel Species.}, journal = {Microbiology spectrum}, volume = {10}, number = {3}, pages = {e0126421}, pmid = {35446150}, issn = {2165-0497}, mesh = {Carotenoids ; DNA ; Genetic Variation ; *Genome, Bacterial/genetics ; *Genomics ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; }, abstract = {Members of the genus Qipengyuania are heterotrophic bacteria frequently isolated from marine environments with great application potential in areas such as carotenoid production. However, the genomic diversity, metabolic function, and adaption of this genus remain largely unclear. Here, 16 isolates related to the genus Qipengyuania were recovered from coastal samples and their genomes were sequenced. The phylogenetic inference of these isolates and reference type strains of this genus indicated that the 16S rRNA gene was insufficient to distinguish them at the species level; instead, the phylogenomic reconstruction could provide the reliable phylogenetic relationships and confirm 15 new well-supported branches, representing 15 putative novel genospecies corroborated by the digital DNA-DNA hybridization and average nucleotide identity analyses. Comparative genomics revealed that the genus Qipengyuania had an open pangenome and possessed multiple conserved genes and pathways related to metabolic functions and environmental adaptation, despite the presence of divergent genomic features and specific metabolic potential. Genetic analysis and pigment detection showed that the members of this genus were identified as carotenoid producers, while some proved to be potentially aerobic anoxygenic photoheterotrophs. Collectively, the first insight into the genetic diversity and metabolic potentials of the genus Qipengyuania will contribute to better understanding of the speciation and adaptive evolution in natural environments. IMPORTANCE The deciphering of the phylogenetic diversity and metabolic features of the abundant bacterial taxa is critical for exploring their ecological importance and application potential. Qipengyuania is a genus of frequently isolated heterotrophic microorganisms with great industrial application potential. Numerous strains related to the genus Qipengyuania have been isolated from diverse environments, but their genomic diversity and metabolic functions remain unclear. Our study revealed a high degree of genetic diversity, metabolic versatility, and environmental adaptation of the genus Qipengyuania using comparative genomics. Fifteen novel species of this genus have been established using a polyphasic taxonomic approach, expanding the number of described species to almost double. This study provided an overall view of the genus Qipengyuania at the genomic level and will enable us to better uncover its ecological roles and evolutionary history.}, } @article {pmid35444317, year = {2022}, author = {Wang, T and Antonacci-Fulton, L and Howe, K and Lawson, HA and Lucas, JK and Phillippy, AM and Popejoy, AB and Asri, M and Carson, C and Chaisson, MJP and Chang, X and Cook-Deegan, R and Felsenfeld, AL and Fulton, RS and Garrison, EP and Garrison, NA and Graves-Lindsay, TA and Ji, H and Kenny, EE and Koenig, BA and Li, D and Marschall, T and McMichael, JF and Novak, AM and Purushotham, D and Schneider, VA and Schultz, BI and Smith, MW and Sofia, HJ and Weissman, T and Flicek, P and Li, H and Miga, KH and Paten, B and Jarvis, ED and Hall, IM and Eichler, EE and Haussler, D and , }, title = {The Human Pangenome Project: a global resource to map genomic diversity.}, journal = {Nature}, volume = {604}, number = {7906}, pages = {437-446}, pmid = {35444317}, issn = {1476-4687}, support = {U01 HG010961/HG/NHGRI NIH HHS/United States ; U41 HG010972/HG/NHGRI NIH HHS/United States ; U01 HG010973/HG/NHGRI NIH HHS/United States ; U01 HG010963/HG/NHGRI NIH HHS/United States ; U01 HG010971/HG/NHGRI NIH HHS/United States ; }, mesh = {*Genome, Human/genetics ; *Genomics ; Haplotypes/genetics ; High-Throughput Nucleotide Sequencing ; Humans ; Sequence Analysis, DNA ; }, abstract = {The human reference genome is the most widely used resource in human genetics and is due for a major update. Its current structure is a linear composite of merged haplotypes from more than 20 people, with a single individual comprising most of the sequence. It contains biases and errors within a framework that does not represent global human genomic variation. A high-quality reference with global representation of common variants, including single-nucleotide variants, structural variants and functional elements, is needed. The Human Pangenome Reference Consortium aims to create a more sophisticated and complete human reference genome with a graph-based, telomere-to-telomere representation of global genomic diversity. Here we leverage innovations in technology, study design and global partnerships with the goal of constructing the highest-possible quality human pangenome reference. Our goal is to improve data representation and streamline analyses to enable routine assembly of complete diploid genomes. With attention to ethical frameworks, the human pangenome reference will contain a more accurate and diverse representation of global genomic variation, improve gene-disease association studies across populations, expand the scope of genomics research to the most repetitive and polymorphic regions of the genome, and serve as the ultimate genetic resource for future biomedical research and precision medicine.}, } @article {pmid35440059, year = {2022}, author = {Ferrero-Serrano, Á and Sylvia, MM and Forstmeier, PC and Olson, AJ and Ware, D and Bevilacqua, PC and Assmann, SM}, title = {Experimental demonstration and pan-structurome prediction of climate-associated riboSNitches in Arabidopsis.}, journal = {Genome biology}, volume = {23}, number = {1}, pages = {101}, pmid = {35440059}, issn = {1474-760X}, mesh = {*Arabidopsis/genetics ; Climate ; Genome, Plant ; Genome-Wide Association Study ; Polymorphism, Single Nucleotide ; RNA, Messenger ; }, abstract = {BACKGROUND: Genome-wide association studies (GWAS) aim to correlate phenotypic changes with genotypic variation. Upon transcription, single nucleotide variants (SNVs) may alter mRNA structure, with potential impacts on transcript stability, macromolecular interactions, and translation. However, plant genomes have not been assessed for the presence of these structure-altering polymorphisms or "riboSNitches."

RESULTS: We experimentally demonstrate the presence of riboSNitches in transcripts of two Arabidopsis genes, ZINC RIBBON 3 (ZR3) and COTTON GOLGI-RELATED 3 (CGR3), which are associated with continentality and temperature variation in the natural environment. These riboSNitches are also associated with differences in the abundance of their respective transcripts, implying a role in regulating the gene's expression in adaptation to local climate conditions. We then computationally predict riboSNitches transcriptome-wide in mRNAs of 879 naturally inbred Arabidopsis accessions. We characterize correlations between SNPs/riboSNitches in these accessions and 434 climate descriptors of their local environments, suggesting a role of these variants in local adaptation. We integrate this information in CLIMtools V2.0 and provide a new web resource, T-CLIM, that reveals associations between transcript abundance variation and local environmental variation.

CONCLUSION: We functionally validate two plant riboSNitches and, for the first time, demonstrate riboSNitch conditionality dependent on temperature, coining the term "conditional riboSNitch." We provide the first pan-genome-wide prediction of riboSNitches in plants. We expand our previous CLIMtools web resource with riboSNitch information and with 1868 additional Arabidopsis genomes and 269 additional climate conditions, which will greatly facilitate in silico studies of natural genetic variation, its phenotypic consequences, and its role in local adaptation.}, } @article {pmid35435457, year = {2022}, author = {Belaouni, HA and Compant, S and Antonielli, L and Nikolic, B and Zitouni, A and Sessitsch, A}, title = {In-depth genome analysis of Bacillus sp. BH32, a salt stress-tolerant endophyte obtained from a halophyte in a semiarid region.}, journal = {Applied microbiology and biotechnology}, volume = {106}, number = {8}, pages = {3113-3137}, pmid = {35435457}, issn = {1432-0614}, mesh = {*Bacillus/genetics ; DNA ; Endophytes/genetics ; *Solanum lycopersicum/microbiology ; Salt Stress ; Salt-Tolerant Plants ; Triticum/microbiology ; }, abstract = {Endophytic strains belonging to the Bacillus cereus group were isolated from the halophytes Atriplex halimus L. (Amaranthaceae) and Tamarix aphylla L. (Tamaricaceae) from costal and continental regions in Algeria. Based on their salt tolerance (up to 5%), the strains were tested for their ability to alleviate salt stress in tomato and wheat. Bacillus sp. strain BH32 showed the highest potential to reduce salinity stress (up to + 50% and + 58% of dry weight improvement, in tomato and wheat, respectively, compared to the control). To determine putative mechanisms involved in salt tolerance and plant growth promotion, the whole genome of Bacillus sp. BH32 was sequenced, annotated, and used for comparative genomics against the genomes of closely related strains. The pangenome of Bacillus sp. BH32 and its closest relative was further analyzed. The phylogenomic analyses confirmed its taxonomic position, a member of the Bacillus cereus group, with intergenomic distances (GBDP analysis) pinpointing to a new taxon (digital DNA-DNA hybridization, dDDH < 70%). Genome mining unveiled several genes involved in stress tolerance, production of anti-oxidants and genes involved in plant growth promotion as well as in the production of secondary metabolites. KEY POINTS : • Bacillus sp. BH32 and other bacterial endophytes were isolated from halophytes, to be tested on tomato and wheat and to limit salt stress adverse effects. • The strain with the highest potential was then studied at the genomic level to highlight numerous genes linked to plant growth promotion and stress tolerance. • Pangenome approaches suggest that the strain belongs to a new taxon within the Bacillus cereus group.}, } @article {pmid35433080, year = {2022}, author = {Li, Z and Li, Z and Peng, Y and Lu, X and Kan, B}, title = {Trans-Regional and Cross-Host Spread of mcr-Carrying Plasmids Revealed by Complete Plasmid Sequences - 44 Countries, 1998-2020.}, journal = {China CDC weekly}, volume = {4}, number = {12}, pages = {242-248}, pmid = {35433080}, issn = {2096-7071}, abstract = {BACKGROUND: The surveillance of antimicrobial resistance genes (ARGs) and bacteria is one critical approach to prevent and control antimicrobial resistance (AMR). Next-generation sequencing (NGS) is a powerful tool in monitoring the emergence and spread of ARGs and resistant bacteria. The horizontal transfer of ARGs across host bacteria mediated by plasmids is a challenge in NGS surveillance for resistance because short-read sequencing can hardly generate the complete plasmid genome sequence, and the correlation between ARGs and plasmids are difficult to determine.

METHODS: The complete genome sequences of 455 mcr-carrying plasmids (pMCRs), and the data of their host bacteria and isolation regions were collected from the NCBI database. Genes of Inc types and ARGs were searched for each plasmid. The genome similarity of these plasmids was analyzed by pangenome clustering and genome alignment.

RESULTS: A total of 52 Inc types, including a variety of fusion plasmids containing 2 or more Inc types were identified in these pMCRs and carried by complex host bacteria. The cooccurrence of ARGs in pMCRs was generally observed, with an average of 3.9 ARGs per plasmid. Twenty-two clusters with consistent or highly similar sequences and gene compositions were identified by the pangenome clustering, which were characterized with distributions in different countries/regions, years or host bacteria in each cluster.

DISCUSSION: Based on the complete plasmid sequences, distribution of mcr genes in different Inc type plasmids, their co-existence with other AMRs, and transmission of one pMCR across regions and host bacteria can be revealed definitively. Complete plasmid genomes and comparisons in the laboratory network are necessary for spread tracing of ARG-carrying plasmids and risk assessment in AMR surveillance.}, } @article {pmid35432229, year = {2022}, author = {Yuan, PB and Zhan, Y and Zhu, JH and Ling, JH and Chen, EZ and Liu, WT and Wang, LJ and Zhong, YX and Chen, DQ}, title = {Pan-Genome Analysis of Laribacter hongkongensis: Virulence Gene Profiles, Carbohydrate-Active Enzyme Prediction, and Antimicrobial Resistance Characterization.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {862776}, pmid = {35432229}, issn = {1664-302X}, abstract = {Laribacter hongkongensis is a new emerging foodborne pathogen that causes community-acquired gastroenteritis and traveler's diarrhea. However, the genetic features of L. hongkongensis have not yet been properly understood. A total of 45 aquatic animal-associated L. hongkongensis strains isolated from intestinal specimens of frogs and grass carps were subjected to whole-genome sequencing (WGS), along with the genome data of 4 reported human clinical strains, the analysis of virulence genes, carbohydrate-active enzymes, and antimicrobial resistance (AMR) determinants were carried out for comprehensively understanding of this new foodborne pathogen. Human clinical strains were genetically more related to some strains from frogs inferred from phylogenetic trees. The distribution of virulence genes and carbohydrate-active enzymes exhibited different patterns among strains of different sources, reflecting their adaption to different host environments and indicating different potentials to infect humans. Thirty-two AMR genes were detected, susceptibility to 18 clinical used antibiotics including aminoglycoside, chloramphenicol, trimethoprim, and sulfa was checked to evaluate the availability of clinical medicines. Resistance to Rifampicin, Cefazolin, ceftazidime, Ampicillin, and ceftriaxone is prevalent in most strains, resistance to tetracycline, trimethoprim-sulfamethoxazole, ciprofloxacin, and levofloxacin are aggregated in nearly half of frog-derived strains, suggesting that drug resistance of frog-derived strains is more serious, and clinical treatment for L. hongkongensis infection should be more cautious.}, } @article {pmid35430877, year = {2022}, author = {Xu, S and Wei, M and Li, G and Li, Z and Che, Y and Han, L and Jia, W and Li, F and Li, D and Li, Z}, title = {Comprehensive Analysis of the Nocardia cyriacigeorgica Complex Reveals Five Species-Level Clades with Different Evolutionary and Pathogenicity Characteristics.}, journal = {mSystems}, volume = {7}, number = {3}, pages = {e0140621}, pmid = {35430877}, issn = {2379-5077}, mesh = {Humans ; Virulence/genetics ; Phylogeny ; *Nocardia/genetics ; Virulence Factors/genetics ; }, abstract = {Nocardia cyriacigeorgica is a common etiological agent of nocardiosis that has increasingly been implicated in serious pulmonary infections, especially in immunocompromised individuals. However, the evolution, diversity, and pathogenesis of N. cyriacigeorgica have remained unclear. Here, we performed a comparative genomic analysis using 91 N. cyriacigeorgica strains, 45 of which were newly sequenced in this study. Phylogenetic and average nucleotide identity (ANI) analyses revealed that N. cyriacigeorgica contained five species-level clades (8.6 to 14.6% interclade genetic divergence), namely, the N. cyriacigeorgica complex (NCC). Further pan-genome analysis revealed extensive differences among the five clades in nine functional categories, such as energy production, lipid metabolism, secondary metabolites, and signal transduction mechanisms. All 2,935 single-copy core genes undergoing purifying selection were highly conserved across NCC. However, clades D and E exhibited reduced selective constraints, compared to clades A to C. Horizontal gene transfer (HGT) and mobile genetic elements contributed to genomic plasticity, and clades A and B had experienced a higher level of HGT events than other clades. A total of 129 virulence factors were ubiquitous across NCC, such as the mce operon, hemolysin, and type VII secretion system (T7SS). However, different distributions of three toxin-coding genes and two new types of mce operons were detected, which might contribute to pathogenicity differences among the members of the NCC. Overall, our study provides comprehensive insights into the evolution, genetic diversity, and pathogenicity of NCC, facilitating the prevention of infections. IMPORTANCE Nocardia species are opportunistic bacterial pathogens that can affect all organ systems, primarily the skin, lungs, and brain. N. cyriacigeorgica is the most prevalent species within the genus, exhibits clinical significance, and can cause severe infections when disseminated throughout the body. However, the evolution, diversity, and pathogenicity of N. cyriacigeorgica remain unclear. Here, we have conducted a comparative genomic analysis of 91 N. cyriacigeorgica strains and revealed that N. cyriacigeorgica is not a single species but is composed of five closely related species. In addition, we discovered that these five species differ in many ways, involving selection pressure, horizontal gene transfer, functional capacity, pathogenicity, and antibiotic resistance. Overall, our work provides important clues in dissecting the evolution, genetic diversity, and pathogenicity of NCC, thereby advancing prevention measures against these infections.}, } @article {pmid35428201, year = {2022}, author = {Yang, MR and Wu, YW}, title = {Enhancing predictions of antimicrobial resistance of pathogens by expanding the potential resistance gene repertoire using a pan-genome-based feature selection approach.}, journal = {BMC bioinformatics}, volume = {23}, number = {Suppl 4}, pages = {131}, pmid = {35428201}, issn = {1471-2105}, support = {MOST108-2628-E-038-002-MY3//Ministry of Science and Technology, Taiwan/ ; MOST110-2221-E-038-019-MY3//Ministry of Science and Technology, Taiwan/ ; }, mesh = {*Anti-Bacterial Agents/pharmacology ; *Drug Resistance, Bacterial/genetics ; Genome, Bacterial ; Machine Learning ; Whole Genome Sequencing/methods ; }, abstract = {BACKGROUND: Predicting which pathogens might exhibit antimicrobial resistance (AMR) based on genomics data is one of the promising ways to swiftly and precisely identify AMR pathogens. Currently, the most widely used genomics approach is through identifying known AMR genes from genomic information in order to predict whether a pathogen might be resistant to certain antibiotic drugs. The list of known AMR genes, however, is still far from comprehensive and may result in inaccurate AMR pathogen predictions. We thus felt the need to expand the AMR gene set and proposed a pan-genome-based feature selection method to identify potential gene sets for AMR prediction purposes.

RESULTS: By building pan-genome datasets and extracting gene presence/absence patterns from four bacterial species, each with more than 2000 strains, we showed that machine learning models built from pan-genome data can be very promising for predicting AMR pathogens. The gene set selected by the eXtreme Gradient Boosting (XGBoost) feature selection approach further improved prediction outcomes, and an incremental approach selecting subsets of XGBoost-selected features brought the machine learning model performance to the next level. Investigating selected gene sets revealed that on average about 50% of genes had no known function and very few of them were known AMR genes, indicating the potential of the selected gene sets to expand resistance gene repertoires.

CONCLUSIONS: We demonstrated that a pan-genome-based feature selection approach is suitable for building machine learning models for predicting AMR pathogens. The extracted gene sets may provide future clues to expand our knowledge of known AMR genes and provide novel hypotheses for inferring bacterial AMR mechanisms.}, } @article {pmid35419298, year = {2022}, author = {Akwani, WC and van Vliet, AHM and Joel, JO and Andres, S and Diricks, M and Maurer, FP and Chambers, MA and Hingley-Wilson, SM}, title = {The Use of Comparative Genomic Analysis for the Development of Subspecies-Specific PCR Assays for Mycobacterium abscessus.}, journal = {Frontiers in cellular and infection microbiology}, volume = {12}, number = {}, pages = {816615}, pmid = {35419298}, issn = {2235-2988}, support = {MC_PC_19052/MRC_/Medical Research Council/United Kingdom ; }, mesh = {Anti-Bacterial Agents ; Genomics ; Humans ; Multiplex Polymerase Chain Reaction ; *Mycobacterium/genetics ; *Mycobacterium Infections, Nontuberculous/diagnosis/microbiology ; *Mycobacterium abscessus/genetics ; }, abstract = {Mycobacterium abscessus complex (MABC) is an important pathogen of immunocompromised patients. Accurate and rapid determination of MABC at the subspecies level is vital for optimal antibiotic therapy. Here we have used comparative genomics to design MABC subspecies-specific PCR assays. Analysis of single nucleotide polymorphisms and core genome multilocus sequence typing showed clustering of genomes into three distinct clusters representing the MABC subspecies M. abscessus, M. bolletii and M. massiliense. Pangenome analysis of 318 MABC genomes from the three subspecies allowed for the identification of 15 MABC subspecies-specific genes. In silico testing of primer sets against 1,663 publicly available MABC genomes and 66 other closely related Mycobacterium genomes showed that all assays had >97% sensitivity and >98% specificity. Subsequent experimental validation of two subspecies-specific genes each showed the PCR assays worked well in individual and multiplex format with no false-positivity with 5 other mycobacteria of clinical importance. In conclusion, we have developed a rapid, accurate, multiplex PCR-assay for discriminating MABC subspecies that could improve their detection, diagnosis and inform correct treatment choice.}, } @article {pmid35418954, year = {2022}, author = {Wambui, J and Stevens, MJA and Cernela, N and Stephan, R}, title = {Unraveling the Genotypic and Phenotypic Diversity of the Psychrophilic Clostridium estertheticum Complex, a Meat Spoilage Agent.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {856810}, pmid = {35418954}, issn = {1664-302X}, abstract = {The spoilage of vacuum-packed meat by Clostridium estertheticum complex (CEC), which is accompanied by or without production of copious amounts of gas, has been linked to the acetone-butyrate-ethanol fermentation, but the mechanism behind the variable gas production has not been fully elucidated. The reconstruction and comparison of intra- and interspecies metabolic pathways linked to meat spoilage at the genomic level can unravel the genetic basis for the variable phenotype. However, this is hindered by unavailability of CEC genomes, which in addition, has hampered the determination of genetic diversity and its drivers within CEC. Therefore, the current study aimed at determining the diversity of CEC through comprehensive comparative genomics. Fifty CEC genomes from 11 CEC species were compared. Recombination and gene gain/loss events were identified as important sources of natural variation within CEC, with the latter being pronounced in genomospecies2 that has lost genes related to flagellar assembly and signaling. Pan-genome analysis revealed variations in carbohydrate metabolic and hydrogenases genes within the complex. Variable inter- and intraspecies gas production in meat by C. estertheticum and Clostridium tagluense were associated with the distribution of the [NiFe]-hydrogenase hyp gene cluster whose absence or presence was associated with occurrence or lack of pack distention, respectively. Through comparative genomics, we have shown CEC species exhibit high genetic diversity that can be partly attributed to recombination and gene gain/loss events. We have also shown genetic basis for variable gas production in meat can be attributed to the presence/absence of the hyp gene cluster.}, } @article {pmid35416699, year = {2022}, author = {Weisberg, AJ and Rahman, A and Backus, D and Tyavanagimatt, P and Chang, JH and Sachs, JL}, title = {Pangenome Evolution Reconciles Robustness and Instability of Rhizobial Symbiosis.}, journal = {mBio}, volume = {13}, number = {3}, pages = {e0007422}, pmid = {35416699}, issn = {2150-7511}, mesh = {Bacteria/metabolism ; *Bradyrhizobium/genetics/metabolism ; Ecosystem ; *Fabaceae/microbiology ; Nitrogen/metabolism ; Nitrogen Fixation ; *Rhizobium/genetics/metabolism ; Symbiosis/genetics ; }, abstract = {Root nodulating rhizobia are nearly ubiquitous in soils and provide the critical service of nitrogen fixation to thousands of legume species, including staple crops. However, the magnitude of fixed nitrogen provided to hosts varies markedly among rhizobia strains, despite host legumes having mechanisms to selectively reward beneficial strains and to punish ones that do not fix sufficient nitrogen. Variation in the services of microbial mutualists is considered paradoxical given host mechanisms to select beneficial genotypes. Moreover, the recurrent evolution of non-fixing symbiont genotypes is predicted to destabilize symbiosis, but breakdown has rarely been observed. Here, we deconstructed hundreds of genome sequences from genotypically and phenotypically diverse Bradyrhizobium strains and revealed mechanisms that generate variation in symbiotic nitrogen fixation. We show that this trait is conferred by a modular system consisting of many extremely large integrative conjugative elements and few conjugative plasmids. Their transmissibility and propensity to reshuffle genes generate new combinations that lead to uncooperative genotypes and make individual partnerships unstable. We also demonstrate that these same properties extend beneficial associations to diverse host species and transfer symbiotic capacity among diverse strains. Hence, symbiotic nitrogen fixation is underpinned by modularity, which engenders flexibility, a feature that reconciles evolutionary robustness and instability. These results provide new insights into mechanisms driving the evolution of mobile genetic elements. Moreover, they yield a new predictive model on the evolution of rhizobial symbioses, one that informs on the health of organisms and ecosystems that are hosts to symbionts and that helps resolve the long-standing paradox. IMPORTANCE Genetic variation is fundamental to evolution yet is paradoxical in symbiosis. Symbionts exhibit extensive variation in the magnitude of services they provide despite hosts having mechanisms to select and increase the abundance of beneficial genotypes. Additionally, evolution of uncooperative symbiont genotypes is predicted to destabilize symbiosis, but breakdown has rarely been observed. We analyzed genome sequences of Bradyrhizobium, bacteria that in symbioses with legume hosts, fix nitrogen, a nutrient essential for ecosystems. We show that genes for symbiotic nitrogen fixation are within elements that can move between bacteria and reshuffle gene combinations that change host range and quality of symbiosis services. Consequently, nitrogen fixation is evolutionarily unstable for individual partnerships, but is evolutionarily stable for legume-Bradyrhizobium symbioses in general. We developed a holistic model of symbiosis evolution that reconciles robustness and instability of symbiosis and informs on applications of rhizobia in agricultural settings.}, } @article {pmid35410384, year = {2022}, author = {Ebler, J and Ebert, P and Clarke, WE and Rausch, T and Audano, PA and Houwaart, T and Mao, Y and Korbel, JO and Eichler, EE and Zody, MC and Dilthey, AT and Marschall, T}, title = {Pangenome-based genome inference allows efficient and accurate genotyping across a wide spectrum of variant classes.}, journal = {Nature genetics}, volume = {54}, number = {4}, pages = {518-525}, pmid = {35410384}, issn = {1546-1718}, support = {U01 HG010973/HG/NHGRI NIH HHS/United States ; R01 HG010169/HG/NHGRI NIH HHS/United States ; U24 HG007497/HG/NHGRI NIH HHS/United States ; R01 HG002385/HG/NHGRI NIH HHS/United States ; /HHMI/Howard Hughes Medical Institute/United States ; }, mesh = {Algorithms ; *Genetic Variation ; *Genome, Human/genetics ; Genome-Wide Association Study ; *Genomics/methods ; Genotype ; High-Throughput Nucleotide Sequencing ; Humans ; Sequence Analysis, DNA ; }, abstract = {Typical genotyping workflows map reads to a reference genome before identifying genetic variants. Generating such alignments introduces reference biases and comes with substantial computational burden. Furthermore, short-read lengths limit the ability to characterize repetitive genomic regions, which are particularly challenging for fast k-mer-based genotypers. In the present study, we propose a new algorithm, PanGenie, that leverages a haplotype-resolved pangenome reference together with k-mer counts from short-read sequencing data to genotype a wide spectrum of genetic variation-a process we refer to as genome inference. Compared with mapping-based approaches, PanGenie is more than 4 times faster at 30-fold coverage and achieves better genotype concordances for almost all variant types and coverages tested. Improvements are especially pronounced for large insertions (≥50 bp) and variants in repetitive regions, enabling the inclusion of these classes of variants in genome-wide association studies. PanGenie efficiently leverages the increasing amount of haplotype-resolved assemblies to unravel the functional impact of previously inaccessible variants while being faster compared with alignment-based workflows.}, } @article {pmid35404220, year = {2022}, author = {Patrick, S}, title = {A tale of two habitats: Bacteroides fragilis, a lethal pathogen and resident in the human gastrointestinal microbiome.}, journal = {Microbiology (Reading, England)}, volume = {168}, number = {4}, pages = {}, doi = {10.1099/mic.0.001156}, pmid = {35404220}, issn = {1465-2080}, mesh = {*Bacterial Infections ; Bacteroides fragilis/genetics/metabolism ; *Gastrointestinal Microbiome ; Gastrointestinal Tract/microbiology ; Humans ; *Microbiota/genetics ; Polysaccharides/metabolism ; }, abstract = {Bacteroides fragilis is an obligately anaerobic Gram-negative bacterium and a major colonizer of the human large colon where Bacteroides is a predominant genus. During the growth of an individual clonal population, an astonishing number of reversible DNA inversion events occur, driving within-strain diversity. Additionally, the B. fragilis pan-genome contains a large pool of diverse polysaccharide biosynthesis loci, DNA restriction/modification systems and polysaccharide utilization loci, which generates remarkable between-strain diversity. Diversity clearly contributes to the success of B. fragilis within its normal habitat of the gastrointestinal (GI) tract and during infection in the extra-intestinal host environment. Within the GI tract, B. fragilis is usually symbiotic, for example providing localized nutrients for the gut epithelium, but B. fragilis within the GI tract may not always be benign. Metalloprotease toxin production is strongly associated with colorectal cancer. B. fragilis is unique amongst bacteria; some strains export a protein >99 % structurally similar to human ubiquitin and antigenically cross-reactive, which suggests a link to autoimmune diseases. B. fragilis is not a primary invasive enteric pathogen; however, if colonic contents contaminate the extra-intestinal host environment, it successfully adapts to this new habitat and causes infection; classically peritoneal infection arising from rupture of an inflamed appendix or GI surgery, which if untreated, can progress to bacteraemia and death. In this review selected aspects of B. fragilis adaptation to the different habitats of the GI tract and the extra-intestinal host environment are considered, along with the considerable challenges faced when studying this highly variable bacterium.}, } @article {pmid35404110, year = {2022}, author = {Baker, JL and Tang, X and LaBonte, S and Uranga, C and Edlund, A}, title = {mucG, mucH, and mucI Modulate Production of Mutanocyclin and Reutericyclins in Streptococcus mutans B04Sm5.}, journal = {Journal of bacteriology}, volume = {204}, number = {5}, pages = {e0004222}, pmid = {35404110}, issn = {1098-5530}, support = {F32 DE026947/DE/NIDCR NIH HHS/United States ; K99 DE029228/DE/NIDCR NIH HHS/United States ; R21 DE028609/DE/NIDCR NIH HHS/United States ; R00 DE024543/DE/NIDCR NIH HHS/United States ; }, mesh = {Biofilms ; *Dental Caries ; Humans ; Phylogeny ; *Streptococcus mutans/metabolism ; Tenuazonic Acid/analogs & derivatives/metabolism ; }, abstract = {Streptococcus mutans is considered a primary etiologic agent of dental caries, which is the most common chronic infectious disease worldwide. S. mutans B04Sm5 was recently shown to produce reutericyclins and mutanocyclin through the muc biosynthetic gene cluster and to utilize reutericyclins to inhibit the growth of neighboring commensal streptococci. In this study, examination of S. mutans and muc phylogeny suggested evolution of an ancestral S. mutans muc into three lineages within one S. mutans clade and then horizontal transfer of muc to other S. mutans clades. The roles of the mucG and mucH transcriptional regulators and the mucI transporter were also examined. mucH was demonstrated to encode a transcriptional activator of muc. mucH deletion reduced production of mutanocyclin and reutericyclins and eliminated the impaired growth and inhibition of neighboring streptococci phenotypes, which are associated with reutericyclin production. ΔmucG had increased mutanocyclin and reutericyclin production, which impaired growth and increased the ability to inhibit neighboring streptococci. However, deletion of mucG also caused reduced expression of mucD, mucE, and mucI. Deletion of mucI reduced mutanocyclin and reutericylin production but enhanced growth, suggesting that mucI may not transport reutericyclin as its homolog does in Limosilactobacillus reuteri. Further research is needed to determine the roles of mucG and mucI and to identify any cofactors affecting the activity of the mucG and mucH regulators. Overall, this study provided pangenome and phylogenetic analyses that serve as a resource for S. mutans research and began elucidation of the regulation of reutericyclins and mutanocyclin production in S. mutans. IMPORTANCE S. mutans must be able to outcompete neighboring organisms in its ecological niche in order to cause dental caries. S. mutans B04Sm5 inhibited the growth of neighboring commensal streptococci through production of reutericyclins via the muc biosynthetic gene cluster. In this study, an S. mutans pangenome database and updated phylogenetic tree were generated that will serve as valuable resources for the S. mutans research community and that provide insights into the carriage and evolution of S. mutans muc. The MucG and MucH regulators, and the MucI transporter, were shown to modulate production of reutericyclins and mutanocyclin. These genes also affected the ability of S. mutans to inhibit neighboring commensals, suggesting that they may play a role in S. mutans virulence.}, } @article {pmid35403388, year = {2022}, author = {Pan, W and Cheng, Z and Han, Z and Yang, H and Zhang, W and Zhang, H}, title = {Efficient genetic transformation and CRISPR/Cas9-mediated genome editing of watermelon assisted by genes encoding developmental regulators.}, journal = {Journal of Zhejiang University. Science. B}, volume = {23}, number = {4}, pages = {339-344}, pmid = {35403388}, issn = {1862-1783}, support = {ZR202103010168//the Excellent Youth Foundation of Shandong Scientific Committee/ ; 2021T140017//the Shandong Science and Technology Innovation Funds, and the China Postdoctoral Science Foundation/ ; }, mesh = {CRISPR-Cas Systems ; *Citrullus/genetics ; *Cucurbitaceae/genetics ; Gene Editing ; Plant Breeding ; Transformation, Genetic ; }, abstract = {Cucurbitaceae is an important family of flowering plants containing multiple species of important food plants, such as melons, cucumbers, squashes, and pumpkins. However, a highly efficient genetic transformation system has not been established for most of these species (Nanasato and Tabei, 2020). Watermelon (Citrullus lanatus), an economically important and globally cultivated fruit crop, is a model species for fruit quality research due to its rich diversity of fruit size, shape, flavor, aroma, texture, peel and flesh color, and nutritional composition (Guo et al., 2019). Through pan-genome sequencing, many candidate loci associated with fruit quality traits have been identified (Guo et al., 2019). However, few of these loci have been validated. The major barrier is the low transformation efficiency of the species, with only few successful cases of genetic transformation reported so far (Tian et al., 2017; Feng et al., 2021; Wang JF et al., 2021; Wang YP et al., 2021). For example, Tian et al. (2017) obtained only 16 transgenic lines from about 960 cotyledon fragments, yielding a transformation efficiency of 1.67%. Therefore, efficient genetic transformation could not only facilitate the functional genomic studies in watermelon as well as other horticultural species, but also speed up the transgenic and genome-editing breeding.}, } @article {pmid35401600, year = {2022}, author = {Sun, Y and Wang, J and Li, Y and Jiang, B and Wang, X and Xu, WH and Wang, YQ and Zhang, PT and Zhang, YJ and Kong, XD}, title = {Pan-Genome Analysis Reveals the Abundant Gene Presence/Absence Variations Among Different Varieties of Melon and Their Influence on Traits.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {835496}, pmid = {35401600}, issn = {1664-462X}, abstract = {Melon (Cucumismelo L.) is an important vegetable crop that has been subjected to domestication and improvement. Several varieties of melons with diverse phenotypes have been produced. In this study, we constructed a melon pan-genome based on 297 accessions comprising 168 Mb novel sequences and 4,325 novel genes. Based on the results, there were abundant genetic variations among different melon groups, including 364 unfavorable genes in the IMP_A vs. LDR_A group, 46 favorable genes, and 295 unfavorable genes in the IMP_M vs. LDR_M group. The distribution of 709 resistance gene analogs (RGAs) was also characterized across 297 melon lines, of which 603 were core genes. Further, 106 genes were found to be variable, 55 of which were absent in the reference melon genome. Using gene presence/absence variation (PAV)-based genome-wide association analysis (GWAS), 13 gene PAVs associated with fruit length, fruit shape, and fruit width were identified, four of which were located in pan-genome additional contigs.}, } @article {pmid35401459, year = {2022}, author = {Kaushik, A and Roberts, DP and Ramaprasad, A and Mfarrej, S and Nair, M and Lakshman, DK and Pain, A}, title = {Pangenome Analysis of the Soilborne Fungal Phytopathogen Rhizoctonia solani and Development of a Comprehensive Web Resource: RsolaniDB.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {839524}, pmid = {35401459}, issn = {1664-302X}, abstract = {Rhizoctonia solani is a collective group of genetically and pathologically diverse basidiomycetous fungi that damage economically important crops. Its isolates are classified into 13 Anastomosis Groups (AGs) and subgroups having distinctive morphology and host ranges. The genetic factors driving the unique features of R. solani pathology are not well characterized due to the limited availability of its annotated genomes. Therefore, we performed genome sequencing, assembly, annotation and functional analysis of 12 R. solani isolates covering 7 AGs and select subgroups (AG1-IA; AG1-IB; AG1-IC; AG2-2IIIB; AG3-PT, isolates Rhs 1AP and the hypovirulent Rhs1A1; AG3-TB; AG4-HG-I, isolates Rs23 and R118-11; AG5; AG6; and AG8), in which six genomes are reported for the first time. Using a pangenome comparative analysis of 12 R. solani isolates and 15 other Basidiomycetes, we defined the unique and shared secretomes, CAZymes, and effectors across the AGs. We have also elucidated the R. solani-derived factors potentially involved in determining AG-specific host preference, and the attributes distinguishing them from other Basidiomycetes. Finally, we present the largest repertoire of R. solani genomes and their annotated components as a comprehensive database, viz. RsolaniDB, with tools for large-scale data mining, functional enrichment and sequence analysis not available with other state-of-the-art platforms.}, } @article {pmid35396275, year = {2022}, author = {Zhang, F and Xue, H and Dong, X and Li, M and Zheng, X and Li, Z and Xu, J and Wang, W and Wei, C}, title = {Long-read sequencing of 111 rice genomes reveals significantly larger pan-genomes.}, journal = {Genome research}, volume = {32}, number = {5}, pages = {853-863}, pmid = {35396275}, issn = {1549-5469}, mesh = {Genome ; Genomics/methods ; High-Throughput Nucleotide Sequencing ; *Oryza/genetics ; Sequence Analysis, DNA ; }, abstract = {The concept of pan-genome, which is the collection of all genomes from a population, has shown a great potential in genomics study, especially for crop sciences. The rice pan-genome constructed from the second-generation sequencing (SGS) data is about 270 Mb larger than Nipponbare, the rice reference genome (NipRG), but it is still disadvantaged by incompleteness and loss of genomic contexts. The third-generation sequencing (TGS) with long reads can help to construct better pan-genomes. In this paper, we report a high-quality rice pan-genome construction method by introducing a series of new steps to deal with the long-read data, including unmapped sequence block filtering, redundancy removing, and sequence block elongating. Compared to NipRG, the long-read sequencing-based pan-genome constructed from 105 rice accessions, which contains 604 Mb novel sequences, is much more comprehensive than the one constructed from ∼3000 rice genomes sequenced with short reads. The repetitive sequences are the main components of novel sequences, which partially explain the differences between the pan-genomes based on TGS and SGS. Adding six wild rice accessions, there are about 879 Mb novel sequences and 19,000 novel genes in the rice pan-genome in total. In addition, we have created high-quality reference genomes for all representative rice populations, including five gapless reference genomes. This study has made significant progress in our understanding of the rice pan-genome, and this pan-genome construction method for long-read data can be applied to accelerate a broad range of genomics studies.}, } @article {pmid35395125, year = {2022}, author = {Kaashyap, M and Kaur, S and Ford, R and Edwards, D and Siddique, KHM and Varshney, RK and Mantri, N}, title = {Comprehensive transcriptomic analysis of two RIL parents with contrasting salt responsiveness identifies polyadenylated and non-polyadenylated flower lncRNAs in chickpea.}, journal = {Plant biotechnology journal}, volume = {20}, number = {7}, pages = {1402-1416}, pmid = {35395125}, issn = {1467-7652}, mesh = {*Cicer/genetics/metabolism ; Flowers/genetics/metabolism ; Gene Expression Profiling ; Gene Expression Regulation, Plant/genetics ; *RNA, Long Noncoding/genetics/metabolism ; Transcriptome/genetics ; }, abstract = {Salinity severely affects the yield of chickpea. Understanding the role of lncRNAs can shed light on chickpea salt tolerance mechanisms. However, because lncRNAs are encoded by multiple sites within the genome, their classification to reveal functional versatility at the transcriptional and the post-transcriptional levels is challenging. To address this, we deep sequenced 24 salt-challenged flower transcriptomes from two parental genotypes of a RIL population that significantly differ in salt tolerance ability. The transcriptomes for the first time included 12 polyadenylated and 12 non-polyadenylated RNA libraries to a sequencing depth of ~50 million reads. The ab initio transcriptome assembly comprised ~34 082 transcripts from three biological replicates of salt-tolerant (JG11) and salt-sensitive (ICCV2) flowers. A total of 9419 lncRNAs responding to salt stress were identified, 2345 of which were novel lncRNAs specific to chickpea. The expression of poly(A+) lncRNAs and naturally antisense transcribed RNAs suggest their role in post-transcriptional modification and gene silencing. Notably, 178 differentially expressed lncRNAs were induced in the tolerant genotype but repressed in the sensitive genotype. Co-expression network analysis revealed that the induced lncRNAs interacted with the FLOWERING LOCUS (FLC), chromatin remodelling and DNA methylation genes, thus inducing flowering during salt stress. Furthermore, 26 lncRNAs showed homology with reported lncRNAs such as COOLAIR, IPS1 and AT4, thus confirming the role of chickpea lncRNAs in controlling flowering time as a crucial salt tolerance mechanism in tolerant chickpea genotype. These robust set of differentially expressed lncRNAs provide a deeper insight into the regulatory mechanisms controlled by lncRNAs under salt stress.}, } @article {pmid35385921, year = {2022}, author = {Jung, H and Kim, HS and Han, G and Park, J and Seo, YS}, title = {Comparative Analyses of Four Complete Genomes in Pseudomonas amygdali Revealed Differential Adaptation to Hostile Environments and Secretion Systems.}, journal = {The plant pathology journal}, volume = {38}, number = {2}, pages = {167-174}, pmid = {35385921}, issn = {1598-2254}, support = {//National Research Foundation of Korea/ ; 2019R1A2C2006779//Ministry of Education/ ; NNIBR202202108//Nakdonggang National Institute of Biological Resources/ ; //Ministry of Environment/ ; }, abstract = {Pseudomonas amygdali is a hemibiotrophic phytopathogen that causes disease in woody and herbaceous plants. Complete genomes of four P. amygdali pathovars were comparatively analyzed to decipher the impact of genomic diversity on host colonization. The pan-genome indicated that 3,928 core genes are conserved among pathovars, while 504-1,009 are unique to specific pathovars. The unique genome contained many mobile elements and exhibited a functional distribution different from the core genome. Genes involved in O-antigen biosynthesis and antimicrobial peptide resistance were significantly enriched for adaptation to hostile environments. While the type III secretion system was distributed in the core genome, unique genomes revealed a different organization of secretion systems as follows: type I in pv. tabaci, type II in pv. japonicus, type IV in pv. morsprunorum, and type VI in pv. lachrymans. These findings provide genetic insight into the dynamic interactions of the bacteria with plant hosts.}, } @article {pmid35382730, year = {2022}, author = {Beier, S and Thomson, NR}, title = {Panakeia - a universal tool for bacterial pangenome analysis.}, journal = {BMC genomics}, volume = {23}, number = {1}, pages = {265}, pmid = {35382730}, issn = {1471-2164}, mesh = {*Bacteria/genetics ; *Genome, Bacterial ; }, abstract = {BACKGROUND: Development of new pan-genome analysis tools is important, as the pangenome of a microbial species has become an important method to define the diversity of a selected taxon, most commonly a species, in the last years. This enables comparison of strains from different ecological niches and can be used to define the functional potential in a bacterial population. It gives us a much better view of microbial genomics than can be gained from singular genomes which after all are just single representatives of a much more varied population.

RESULTS: We present Panakeia, a tool which strives to be easy to use and providing a detailed view of the pangenome structure which can efficiently be utilised for discovery, or further in-depth analysis, of features of interest. It analyses synteny and multiple structural patterns of the pangenome, giving insights into the biological diversity and evolution of the studied taxon. Panakeia hence provides both broad and detailed information on the structure of a pangenome, for diverse and highly clonal populations of bacteria.

CONCLUSIONS: Previously published pangenome tools often reduce the information to a presence/absence matrix of unconnected genes or generate massive hard to interpret output graphs. However, Panakeia includes synteny and structural information and presents it in a way that can readily be used for further analysis. Panakeia can be downloaded at https://github.com/BioSina/Panakeia together with a detailed User Guide.}, } @article {pmid35380461, year = {2022}, author = {Sivertsen, A and Dyrhovden, R and Tellevik, MG and Bruvold, TS and Nybakken, E and Skutlaberg, DH and Skarstein, I and Kommedal, Ø}, title = {Escherichia marmotae-a Human Pathogen Easily Misidentified as Escherichia coli.}, journal = {Microbiology spectrum}, volume = {10}, number = {2}, pages = {e0203521}, pmid = {35380461}, issn = {2165-0497}, mesh = {*Anti-Infective Agents ; Escherichia ; Escherichia coli/genetics ; *Escherichia coli Infections/diagnosis/microbiology ; Humans ; Phylogeny ; RNA, Ribosomal, 16S/genetics ; *Sepsis ; }, abstract = {We hereby present the first descriptions of human-invasive infections caused by Escherichia marmotae, a recently described species that encompasses the former "Escherichia cryptic clade V." We describe four cases, one acute sepsis of unknown origin, one postoperative sepsis after cholecystectomy, one spondylodiscitis, and one upper urinary tract infection. Cases were identified through unsystematic queries in a single clinical lab over 6 months. Through genome sequencing of the causative strains combined with available genomes from elsewhere, we demonstrate Es. marmotae to be a likely ubiquitous species containing genotypic virulence traits associated with Escherichia pathogenicity. The invasive isolates were scattered among isolates from a range of nonhuman sources in the phylogenetic analyses, thus indicating inherent virulence in multiple lineages. Pan genome analyses indicate that Es. marmotae has a large accessory genome and is likely to obtain ecologically advantageous traits, such as genes encoding antimicrobial resistance. Reliable identification might be possible by matrix-assisted laser desorption ionization-time of flight mass spectrometry (MALDI-TOF MS), but relevant spectra are missing in commercial databases. It can be identified through 16S rRNA gene sequencing. Escherichia marmotae could represent a relatively common human pathogen, and improved diagnostics will provide a better understanding of its clinical importance. IMPORTANCE Escherichia coli is the most common pathogen found in blood cultures and urine and among the most important pathogenic species in the realm of human health. The notion that some of these isolates are not Es. coli but rather another species within the same genus may have implications for what Es. coli constitutes. We only recently have obtained methods to separate the two species, which means that possible differences in important clinical aspects, such as antimicrobial resistance rates, virulence, and phylogenetic structure, may exist. We believe that Es. marmotae as a common pathogen is new merely because we have not looked or bothered to distinguish between the thousands of invasive Escherichia passing through microbiological laboratories each day.}, } @article {pmid35371168, year = {2022}, author = {Zhang, Z and Guo, J and Cai, X and Li, Y and Xi, X and Lin, R and Liang, J and Wang, X and Wu, J}, title = {Improved Reference Genome Annotation of Brassica rapa by Pacific Biosciences RNA Sequencing.}, journal = {Frontiers in plant science}, volume = {13}, number = {}, pages = {841618}, pmid = {35371168}, issn = {1664-462X}, abstract = {The species Brassica rapa includes several important vegetable crops. The draft reference genome of B. rapa ssp. pekinensis was completed in 2011, and it has since been updated twice. The pangenome with structural variations of 18 B. rapa accessions was published in 2021. Although extensive genomic analysis has been conducted on B. rapa, a comprehensive genome annotation including gene structure, alternative splicing (AS) events, and non-coding genes is still lacking. Therefore, we used the Pacific Biosciences (PacBio) single-molecular long-read technology to improve gene models and produced the annotated genome version 3.5. In total, we obtained 753,041 full-length non-chimeric (FLNC) reads and collapsed these into 92,810 non-redundant consensus isoforms, capturing 48% of the genes annotated in the B. rapa reference genome annotation v3.1. Based on the isoform data, we identified 830 novel protein-coding genes that were missed in previous genome annotations, defined the untranslated regions (UTRs) of 20,340 annotated genes and corrected 886 wrongly spliced genes. We also identified 28,564 AS events and 1,480 long non-coding RNAs (lncRNAs). We produced a relatively complete and high-quality reference transcriptome for B. rapa that can facilitate further functional genomic research.}, } @article {pmid35369469, year = {2022}, author = {Sanz, MB and De Belder, D and de Mendieta, JM and Faccone, D and Poklepovich, T and Lucero, C and Rapoport, M and Campos, J and Tuduri, E and Saavedra, MO and Van der Ploeg, C and Rogé, A and , and Pasteran, F and Corso, A and Rosato, AE and Gomez, SA}, title = {Carbapenemase-Producing Extraintestinal Pathogenic Escherichia coli From Argentina: Clonal Diversity and Predominance of Hyperepidemic Clones CC10 and CC131.}, journal = {Frontiers in microbiology}, volume = {13}, number = {}, pages = {830209}, pmid = {35369469}, issn = {1664-302X}, abstract = {Extraintestinal pathogenic Escherichia coli (ExPEC) causes infections outside the intestine. Particular ExPEC clones, such as clonal complex (CC)/sequence type (ST)131, have been known to sequentially accumulate antimicrobial resistance that starts with chromosomal mutations against fluoroquinolones, followed with the acquisition of bla CTX-M-15 and, more recently, carbapenemases. Here we aimed to investigate the distribution of global epidemic clones of carbapenemase-producing ExPEC from Argentina in representative clinical isolates recovered between July 2008 and March 2017. Carbapenemase-producing ExPEC (n = 160) were referred to the Argentinean reference laboratory. Of these, 71 were selected for genome sequencing. Phenotypic and microbiological studies confirmed the presence of carbapenemases confirmed as KPC-2 (n = 52), NDM-1 (n = 16), IMP-8 (n = 2), and VIM-1 (n = 1) producers. The isolates had been recovered mainly from urine, blood, and abdominal fluids among others, and some were from screening samples. After analyzing the virulence gene content, 76% of the isolates were considered ExPEC, although non-ExPEC isolates were also obtained from extraintestinal sites. Pan-genome phylogeny and clonal analysis showed great clonal diversity, although the first phylogroup in abundance was phylogroup A, harboring CC10 isolates, followed by phylogroup B2 with CC/ST131, mostly H30Rx, the subclone co-producing CTX-M-15. Phylogroups D, B1, C, F, and E were also detected with fewer strains. CC10 and CC/ST131 were found throughout the country. In addition, CC10 nucleated most metalloenzymes, such as NDM-1. Other relevant international clones were identified, such as CC/ST38, CC155, CC14/ST1193, and CC23. Two isolates co-produced KPC-2 and OXA-163 or OXA-439, a point mutation variant of OXA-163, and three isolates co-produced MCR-1 among other resistance genes. To conclude, in this work, we described the molecular epidemiology of carbapenemase-producing ExPEC in Argentina. Further studies are necessary to determine the plasmid families disseminating carbapenemases in ExPEC in this region.}, } @article {pmid35365914, year = {2022}, author = {Johnson, LA and Hug, LA}, title = {Cloacimonadota metabolisms include adaptations in engineered environments that are reflected in the evolutionary history of the phylum.}, journal = {Environmental microbiology reports}, volume = {14}, number = {4}, pages = {520-529}, doi = {10.1111/1758-2229.13061}, pmid = {35365914}, issn = {1758-2229}, mesh = {Bacteria/genetics ; Biological Evolution ; *Ecosystem ; *Metagenome ; Metagenomics ; Phylogeny ; }, abstract = {Phylum Cloacimonadota (previously Cloacimonetes, WWE1) is an understudied bacterial lineage frequently associated with engineered and wastewater systems. Cloacimonadota members were abundant and diverse in metagenomic datasets from a municipal landfill, prompting an examination of phylogenetic relationships, metabolic diversity, and pangenomic dynamics across the phylum, based on the 30 publicly available genomes and 24 new metagenome-assembled genomes (MAGs) from landfill samples. We found that Cloacimonadota have distinct evolutionary histories associated with engineered versus natural environments and identified genomic features and metabolic strategies that correlate to habitat of origin. Metabolic reconstructions for MAGs predict an anaerobic, acetogenic, and mixed fermentative and flavin-bifurcation-based anaerobic respiratory lifestyle for the majority of Cloacimonadota surveyed. Genomes from engineered ecosystems encode a suite of genes not typically found in genomes from natural environments including acetate kinase, genes for cysteine degradation to pyruvate, increased diversity of carbon utilization enzymes, and different mechanisms for generating membrane potential and ATP synthesis. This phylum-level examination also clarifies the distribution of functions previously observed for members of the phylum, where propionate oxidation and reverse TCA cycles are not common components of Cloacimonadota metabolism.}, } @article {pmid35357213, year = {2022}, author = {Shropshire, WC and Dinh, AQ and Earley, M and Komarow, L and Panesso, D and Rydell, K and Gómez-Villegas, SI and Miao, H and Hill, C and Chen, L and Patel, R and Fries, BC and Abbo, L and Cober, E and Revolinski, S and Luterbach, CL and Chambers, H and Fowler, VG and Bonomo, RA and Shelburne, SA and Kreiswirth, BN and van Duin, D and Hanson, BM and Arias, CA}, title = {Accessory Genomes Drive Independent Spread of Carbapenem-Resistant Klebsiella pneumoniae Clonal Groups 258 and 307 in Houston, TX.}, journal = {mBio}, volume = {13}, number = {2}, pages = {e0049722}, pmid = {35357213}, issn = {2150-7511}, support = {P01 AI152999/AI/NIAID NIH HHS/United States ; UM1 AI104681/AI/NIAID NIH HHS/United States ; R01 AI143910/AI/NIAID NIH HHS/United States ; K24 AI121296/AI/NIAID NIH HHS/United States ; K01 AI148593/AI/NIAID NIH HHS/United States ; }, mesh = {*Carbapenem-Resistant Enterobacteriaceae/genetics ; Carbapenems/pharmacology ; Humans ; *Klebsiella Infections/epidemiology ; Klebsiella pneumoniae ; Prospective Studies ; }, abstract = {Carbapenem-resistant Klebsiella pneumoniae (CRKp) is an urgent public health threat. Worldwide dissemination of CRKp has been largely attributed to clonal group (CG) 258. However, recent evidence indicates the global emergence of a CRKp CG307 lineage. Houston, TX, is the first large city in the United States with detected cocirculation of both CRKp CG307 and CG258. We sought to characterize the genomic and clinical factors contributing to the parallel endemic spread of CG258 and CG307. CRKp isolates were collected as part of the prospective, Consortium on Resistance against Carbapenems in Klebsiella and other Enterobacterales 2 (CRACKLE-2) study. Hybrid short-read and long-read genome assemblies were generated from 119 CRKp isolates (95 originated from Houston hospitals). A comprehensive characterization of phylogenies, gene transfer, and plasmid content with pan-genome analysis was performed on all CRKp isolates. Plasmid mating experiments were performed with CG307 and CG258 isolates of interest. Dissection of the accessory genomes suggested independent evolution and limited horizontal gene transfer between CG307 and CG258 lineages. CG307 contained a diverse repertoire of mobile genetic elements, which were shared with other non-CG258 K. pneumoniae isolates. Three unique clades of Houston CG307 isolates clustered distinctly from other global CG307 isolates, indicating potential selective adaptation of particular CG307 lineages to their respective geographical niches. CG307 strains were often isolated from the urine of hospitalized patients, likely serving as important reservoirs for genes encoding carbapenemases and extended-spectrum β-lactamases. Our findings suggest parallel cocirculation of high-risk lineages with potentially divergent evolution. IMPORTANCE The prevalence of carbapenem-resistant Klebsiella pneumoniae (CRKp) infections in nosocomial settings remains a public health challenge. High-risk clones such as clonal group 258 (CG258) are particularly concerning due to their association with blaKPC carriage, which can severely complicate antimicrobial treatments. There is a recent emergence of clonal group 307 (CG307) worldwide with little understanding of how this successful clone has been able to adapt while cocirculating with CG258. We provide the first evidence of potentially divergent evolution between CG258 and CG307 with limited sharing of adaptive genes. Houston, TX, is home to the largest medical center in the world, with a large influx of domestic and international patients. Thus, our unique geographical setting, where two pandemic strains of CRKp are circulating, provides an indication of how differential accessory genome content can drive stable, endemic populations of CRKp. Pan-genomic analyses such as these can reveal unique signatures of successful CRKp dissemination, such as the CG307-associated plasmid (pCG307_HTX), and provide invaluable insights into the surveillance of local carbapenem-resistant Enterobacterales (CRE) epidemiology.}, } @article {pmid35352958, year = {2022}, author = {Gan, L and Yan, C and Cui, J and Xue, G and Fu, H and Du, B and Zhao, H and Feng, J and Feng, Y and Fan, Z and Mao, P and Fu, T and Xu, Z and Du, S and Liu, S and Zhang, R and Zhang, Q and Li, N and Cui, X and Li, X and Zhou, Y and Huang, L and Yuan, J}, title = {Genetic Diversity and Pathogenic Features in Klebsiella pneumoniae Isolates from Patients with Pyogenic Liver Abscess and Pneumonia.}, journal = {Microbiology spectrum}, volume = {10}, number = {2}, pages = {e0264621}, pmid = {35352958}, issn = {2165-0497}, mesh = {Animals ; *Community-Acquired Infections ; *Cross Infection ; Genetic Variation ; Humans ; *Klebsiella Infections/epidemiology/microbiology ; Klebsiella pneumoniae/genetics ; *Liver Abscess, Pyogenic/epidemiology/microbiology ; Mice ; *Pneumonia ; Virulence Factors/genetics ; }, abstract = {While Klebsiella pneumoniae is a common cause of nosocomial and community-acquired infections, including pneumonia and pyogenic liver abscess, little is known about the population structure of this bacterium. In this study, we investigated the prevalence and molecular characteristics of K. pneumoniae isolates from carriers, pyogenic liver abscess patients, and pneumonia patients, and genomic and phenotypic assays were used to determine the differences among the isolates. A total of 232 K. pneumoniae isolates were subtyped into 74 sequence types (STs). The isolates from different sources had their own STs, and the predominant subtypes in liver abscess and pneumonia patients were ST23 and ST11, respectively. Pangenome analysis also distinguished three phylogroups that were consistent with the isolate sources. The isolates collected from liver abscess patients carried significantly more virulence factors, and those from pneumonia patients harbored significantly more resistance genes and replicons. Almost all isolate STs (93/97 [95.88%]) from liver abscesses strongly correlated with the virulence factor salmochelin, while most pneumonia isolate STs (52/53 [98.11%]) from pneumonia did not correlate with salmochelin. The isolates collected from liver abscesses showed higher virulence in the cytotoxicity and mouse models. These data provide genomic support for the proposal that isolates collected from carriers, liver abscess patients, and pneumonia patients have distinct genomic features. Isolates from the different sources are largely nonoverlapping, suggesting that different patients may be infected via different sources. Further studies on the pathogenic mechanisms of salmochelin and other virulence factors will be required. IMPORTANCE While Klebsiella pneumoniae is a common cause of nosocomial and community-acquired infections, including pneumonia and pyogenic liver abscess, little is known about the population structure of this bacterium. We collected 232 isolates from carriers, pyogenic liver abscess patients, and pneumonia patients, and the isolates from different sources had their own sequence types. Pangenome analysis also distinguished three phylogroups that were consistent with the isolate sources. The isolates collected from liver abscess patients carried significantly more virulence factors, and those from pneumonia patients harbored significantly more resistance genes and replicons. Besides, there was a strong link between salmochelin and liver abscess. The isolates collected from liver abscesses also showed higher virulence in the cytotoxicity and mouse models. Isolates collected from different sources have distinct genomic features, suggesting that different patients may be infected via different sources.}, }