{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "- I wanted to recreate Figure 1 of \"Insights from 20 years of bacterial genome sequencing\" https://link.springer.com/article/10.1007%2Fs10142-015-0433-4\n", "- I downloaded the release date of prokaryote genomes from here: https://www.ncbi.nlm.nih.gov/genome/browse/#!/prokaryotes/\n", " - To here: group/jeff-law/figs/igacat/2020-01-gbcb/ann-stats/2020-01-17-prokaryotes.csv" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import seaborn as sns\n", "# make this the default for now\n", "sns.set_style('darkgrid')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | #Organism Name | \n", "Organism Groups | \n", "Size(Mb) | \n", "CDS | \n", "Release Date | \n", "GenBank FTP | \n", "RefSeq FTP | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "Campylobacter jejuni subsp. jejuni NCTC 11168 ... | \n", "Bacteria;Proteobacteria;delta/epsilon subdivis... | \n", "1.64148 | \n", "1572 | \n", "2001-09-27 | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000... | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000... | \n", "
1 | \n", "Pseudomonas fluorescens F113 | \n", "Bacteria;Proteobacteria;Gammaproteobacteria | \n", "6.84583 | \n", "5989 | \n", "2011-12-09 | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000... | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000... | \n", "
2 | \n", "Xanthomonas campestris pv. campestris str. ATC... | \n", "Bacteria;Proteobacteria;Gammaproteobacteria | \n", "5.07619 | \n", "4179 | \n", "2001-11-28 | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000... | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000... | \n", "
3 | \n", "Salmonella enterica subsp. enterica serovar Ty... | \n", "Bacteria;Proteobacteria;Gammaproteobacteria | \n", "5.13371 | \n", "4473 | \n", "2001-11-07 | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000... | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000... | \n", "
4 | \n", "Yersinia pestis CO92 | \n", "Bacteria;Proteobacteria;Gammaproteobacteria | \n", "4.82986 | \n", "3979 | \n", "2001-10-15 | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000... | \n", "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000... | \n", "