{ "cells": [ { "cell_type": "markdown", "id": "3ee5cf2b-f2d5-4a3f-908f-10a39cfcc4cd", "metadata": {}, "source": [ "# Pandas\n", "\n", "In this notebook, we’re going to show how to:\n", "* create data frames\n", "* inspect data frames" ] }, { "cell_type": "code", "execution_count": 1, "id": "6250ca82-6708-4f07-948f-c90ef1495f84", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "id": "62424fc8", "metadata": {}, "source": [ "## Create a dataframe with random data" ] }, { "cell_type": "code", "execution_count": 2, "id": "7d78f4fd-83eb-4815-a1b3-d4b9522c9519", "metadata": {}, "outputs": [], "source": [ "from numpy.random import default_rng\n", "rng = default_rng(42)" ] }, { "cell_type": "code", "execution_count": 3, "id": "98788698-8af6-42ce-8a57-e04dae0441f0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(['A', 'B', 'C', 'D', 'E', 'F', 'G'], ['0', '1', '2', '3'])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "index_list = list('ABCDEFG')\n", "column_list= list('0123')\n", "\n", "n_rows, n_cols = len(index_list), len(column_list)\n", "index_list, column_list" ] }, { "cell_type": "code", "execution_count": 4, "id": "da06c825-ea6c-46d8-a1c1-f13d6d74675a", "metadata": {}, "outputs": [], "source": [ "data = rng.normal(size=(n_rows, n_cols))" ] }, { "cell_type": "code", "execution_count": 5, "id": "ff491935-e2de-45ff-b89a-85c18bef8092", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
A0.304717-1.0399840.7504510.940565
B-1.951035-1.3021800.127840-0.316243
C-0.016801-0.8530440.8793980.777792
D0.0660311.1272410.467509-0.859292
E0.368751-0.9588830.878450-0.049926
F-0.184862-0.6809301.222541-0.154529
G-0.428328-0.3521340.5323090.365444
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "A 0.304717 -1.039984 0.750451 0.940565\n", "B -1.951035 -1.302180 0.127840 -0.316243\n", "C -0.016801 -0.853044 0.879398 0.777792\n", "D 0.066031 1.127241 0.467509 -0.859292\n", "E 0.368751 -0.958883 0.878450 -0.049926\n", "F -0.184862 -0.680930 1.222541 -0.154529\n", "G -0.428328 -0.352134 0.532309 0.365444" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(data=data, index=index_list, columns=column_list)\n", "df" ] }, { "cell_type": "markdown", "id": "5ee86614-db29-43bd-8344-2b6465fdab3a", "metadata": {}, "source": [ "### Another way to get list of letters and digits to use as index/columns " ] }, { "cell_type": "code", "execution_count": 6, "id": "f5d36527-619f-48ad-bba4-e311873c8143", "metadata": {}, "outputs": [], "source": [ "import string\n", "\n", "index_list = list(string.ascii_uppercase)\n", "column_list = list(string.digits)\n", "\n", "n_rows, n_cols = len(index_list), len(column_list)" ] }, { "cell_type": "markdown", "id": "d6b55365-0d3b-4aea-a438-7886d5654636", "metadata": {}, "source": [ "see [string doc](https://docs.python.org/3/library/string.html) for info on the `string` module" ] }, { "cell_type": "code", "execution_count": 7, "id": "75f28162-5bcb-4645-a073-9463385b8b9d", "metadata": {}, "outputs": [], "source": [ "data = rng.normal(size=(n_rows, n_cols))" ] }, { "cell_type": "code", "execution_count": 8, "id": "377e2646-d390-4e96-8caa-75327a257bc2", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
A0.4127330.4308212.141648-0.406415-0.512243-0.8137730.6159791.128972-0.113947-0.840156
B-0.8244810.6505930.7432540.543154-0.6655100.2321610.1166860.2186890.8714290.223596
C0.6789140.0675790.2891190.631288-1.457156-0.319671-0.470373-0.638878-0.2751421.494941
D-0.8658310.968278-1.682870-0.3348850.1627530.5862220.7112270.793347-0.348725-0.462352
E0.857976-0.191304-1.275686-1.133287-0.9194520.4971610.1424260.690485-0.4272530.158540
F0.625590-0.3093470.456775-0.661926-0.363054-0.381738-1.1958400.486972-0.4694020.012494
G0.4807470.4465310.665385-0.098485-0.423298-0.079718-1.687334-1.447112-1.322700-0.997247
H0.399774-0.905479-0.3781631.299228-0.3562640.737516-0.933618-0.205438-0.950022-0.339033
I0.840308-1.7273200.4344240.237736-0.594150-1.4460580.072130-0.5294930.2326760.021852
J1.601779-0.239356-1.0234970.1792760.2199971.3591880.8351110.3568711.463303-1.188763
K-0.639752-0.926576-0.389810-1.3766860.635151-0.222223-1.470806-1.0155790.3135140.838127
L1.9967312.9138620.414409-0.989538-2.1320460.267711-0.812941-0.415357-0.612097-0.140791
M1.0659800.157049-0.158635-1.035654-1.674683-0.486308-0.0537831.7679300.1302750.982740
N-0.499296-1.184944-0.965117-0.7252262.128470-0.8213870.838489-0.9029270.9315730.384951
O-0.156638-0.040763-0.6547880.446072-0.454983-1.225606-1.2779380.1725881.5790910.159992
P-0.1186380.2858261.3060020.219383-0.4109271.1062890.4287561.5357560.183234-1.224469
Q-1.3681591.6509281.723666-0.179519-0.3831871.461444-1.107046-0.8947270.643327-0.394605
R-0.005122-0.1634430.3375751.4074820.0905850.643939-2.050172-0.048718-0.843230-1.218813
S-0.878152-0.3341230.915903-1.3263930.030631-0.484169-0.3276731.0027580.5381151.337398
T-0.154506-0.695943-0.2238590.2424970.176573-1.0843880.0904900.2282282.5174741.876845
U-0.853243-0.287383-1.463442-0.5907070.3156051.205854-0.729084-0.654146-2.147289-0.162666
V-1.062414-0.529439-0.876861-0.094263-1.757728-1.4670452.129247-1.287423-1.0967861.836914
W2.905067-1.171567-0.3682490.3415561.728698-0.986857-0.2452780.7773380.434766-0.376156
X-0.133823-1.374896-0.238174-0.2663870.232170-0.5553270.4715391.0127160.1554290.351756
Y0.0531550.000084-0.7215580.316494-0.0972872.0931681.5733550.385847-0.763057-1.112411
Z1.1911430.2627490.480143-1.7445860.9274380.454420-1.110431-0.4715250.2637170.052467
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "A 0.412733 0.430821 2.141648 -0.406415 -0.512243 -0.813773 0.615979 \n", "B -0.824481 0.650593 0.743254 0.543154 -0.665510 0.232161 0.116686 \n", "C 0.678914 0.067579 0.289119 0.631288 -1.457156 -0.319671 -0.470373 \n", "D -0.865831 0.968278 -1.682870 -0.334885 0.162753 0.586222 0.711227 \n", "E 0.857976 -0.191304 -1.275686 -1.133287 -0.919452 0.497161 0.142426 \n", "F 0.625590 -0.309347 0.456775 -0.661926 -0.363054 -0.381738 -1.195840 \n", "G 0.480747 0.446531 0.665385 -0.098485 -0.423298 -0.079718 -1.687334 \n", "H 0.399774 -0.905479 -0.378163 1.299228 -0.356264 0.737516 -0.933618 \n", "I 0.840308 -1.727320 0.434424 0.237736 -0.594150 -1.446058 0.072130 \n", "J 1.601779 -0.239356 -1.023497 0.179276 0.219997 1.359188 0.835111 \n", "K -0.639752 -0.926576 -0.389810 -1.376686 0.635151 -0.222223 -1.470806 \n", "L 1.996731 2.913862 0.414409 -0.989538 -2.132046 0.267711 -0.812941 \n", "M 1.065980 0.157049 -0.158635 -1.035654 -1.674683 -0.486308 -0.053783 \n", "N -0.499296 -1.184944 -0.965117 -0.725226 2.128470 -0.821387 0.838489 \n", "O -0.156638 -0.040763 -0.654788 0.446072 -0.454983 -1.225606 -1.277938 \n", "P -0.118638 0.285826 1.306002 0.219383 -0.410927 1.106289 0.428756 \n", "Q -1.368159 1.650928 1.723666 -0.179519 -0.383187 1.461444 -1.107046 \n", "R -0.005122 -0.163443 0.337575 1.407482 0.090585 0.643939 -2.050172 \n", "S -0.878152 -0.334123 0.915903 -1.326393 0.030631 -0.484169 -0.327673 \n", "T -0.154506 -0.695943 -0.223859 0.242497 0.176573 -1.084388 0.090490 \n", "U -0.853243 -0.287383 -1.463442 -0.590707 0.315605 1.205854 -0.729084 \n", "V -1.062414 -0.529439 -0.876861 -0.094263 -1.757728 -1.467045 2.129247 \n", "W 2.905067 -1.171567 -0.368249 0.341556 1.728698 -0.986857 -0.245278 \n", "X -0.133823 -1.374896 -0.238174 -0.266387 0.232170 -0.555327 0.471539 \n", "Y 0.053155 0.000084 -0.721558 0.316494 -0.097287 2.093168 1.573355 \n", "Z 1.191143 0.262749 0.480143 -1.744586 0.927438 0.454420 -1.110431 \n", "\n", " 7 8 9 \n", "A 1.128972 -0.113947 -0.840156 \n", "B 0.218689 0.871429 0.223596 \n", "C -0.638878 -0.275142 1.494941 \n", "D 0.793347 -0.348725 -0.462352 \n", "E 0.690485 -0.427253 0.158540 \n", "F 0.486972 -0.469402 0.012494 \n", "G -1.447112 -1.322700 -0.997247 \n", "H -0.205438 -0.950022 -0.339033 \n", "I -0.529493 0.232676 0.021852 \n", "J 0.356871 1.463303 -1.188763 \n", "K -1.015579 0.313514 0.838127 \n", "L -0.415357 -0.612097 -0.140791 \n", "M 1.767930 0.130275 0.982740 \n", "N -0.902927 0.931573 0.384951 \n", "O 0.172588 1.579091 0.159992 \n", "P 1.535756 0.183234 -1.224469 \n", "Q -0.894727 0.643327 -0.394605 \n", "R -0.048718 -0.843230 -1.218813 \n", "S 1.002758 0.538115 1.337398 \n", "T 0.228228 2.517474 1.876845 \n", "U -0.654146 -2.147289 -0.162666 \n", "V -1.287423 -1.096786 1.836914 \n", "W 0.777338 0.434766 -0.376156 \n", "X 1.012716 0.155429 0.351756 \n", "Y 0.385847 -0.763057 -1.112411 \n", "Z -0.471525 0.263717 0.052467 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(data=data, index=index_list, columns=column_list)\n", "df" ] }, { "cell_type": "markdown", "id": "c430c3af", "metadata": {}, "source": [ "## Inspect the dataframe" ] }, { "cell_type": "markdown", "id": "c0fba470-cbfd-4014-a3c1-80a5805ab6b2", "metadata": {}, "source": [ "### see the first/last 5 rows" ] }, { "cell_type": "code", "execution_count": 9, "id": "67c9e6cc-087e-430f-893f-5fd8cd32dcb8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
A0.4127330.4308212.141648-0.406415-0.512243-0.8137730.6159791.128972-0.113947-0.840156
B-0.8244810.6505930.7432540.543154-0.6655100.2321610.1166860.2186890.8714290.223596
C0.6789140.0675790.2891190.631288-1.457156-0.319671-0.470373-0.638878-0.2751421.494941
D-0.8658310.968278-1.682870-0.3348850.1627530.5862220.7112270.793347-0.348725-0.462352
E0.857976-0.191304-1.275686-1.133287-0.9194520.4971610.1424260.690485-0.4272530.158540
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "A 0.412733 0.430821 2.141648 -0.406415 -0.512243 -0.813773 0.615979 \n", "B -0.824481 0.650593 0.743254 0.543154 -0.665510 0.232161 0.116686 \n", "C 0.678914 0.067579 0.289119 0.631288 -1.457156 -0.319671 -0.470373 \n", "D -0.865831 0.968278 -1.682870 -0.334885 0.162753 0.586222 0.711227 \n", "E 0.857976 -0.191304 -1.275686 -1.133287 -0.919452 0.497161 0.142426 \n", "\n", " 7 8 9 \n", "A 1.128972 -0.113947 -0.840156 \n", "B 0.218689 0.871429 0.223596 \n", "C -0.638878 -0.275142 1.494941 \n", "D 0.793347 -0.348725 -0.462352 \n", "E 0.690485 -0.427253 0.158540 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 10, "id": "0478ee54-57a9-4209-8fb8-c248d64253d9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
V-1.062414-0.529439-0.876861-0.094263-1.757728-1.4670452.129247-1.287423-1.0967861.836914
W2.905067-1.171567-0.3682490.3415561.728698-0.986857-0.2452780.7773380.434766-0.376156
X-0.133823-1.374896-0.238174-0.2663870.232170-0.5553270.4715391.0127160.1554290.351756
Y0.0531550.000084-0.7215580.316494-0.0972872.0931681.5733550.385847-0.763057-1.112411
Z1.1911430.2627490.480143-1.7445860.9274380.454420-1.110431-0.4715250.2637170.052467
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "V -1.062414 -0.529439 -0.876861 -0.094263 -1.757728 -1.467045 2.129247 \n", "W 2.905067 -1.171567 -0.368249 0.341556 1.728698 -0.986857 -0.245278 \n", "X -0.133823 -1.374896 -0.238174 -0.266387 0.232170 -0.555327 0.471539 \n", "Y 0.053155 0.000084 -0.721558 0.316494 -0.097287 2.093168 1.573355 \n", "Z 1.191143 0.262749 0.480143 -1.744586 0.927438 0.454420 -1.110431 \n", "\n", " 7 8 9 \n", "V -1.287423 -1.096786 1.836914 \n", "W 0.777338 0.434766 -0.376156 \n", "X 1.012716 0.155429 0.351756 \n", "Y 0.385847 -0.763057 -1.112411 \n", "Z -0.471525 0.263717 0.052467 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tail()" ] }, { "cell_type": "markdown", "id": "09f36b39-ead5-4b39-9df3-70c4418d2317", "metadata": {}, "source": [ "### see different number of top/bottom rows" ] }, { "cell_type": "code", "execution_count": 11, "id": "25316eb0-2fef-4385-b38c-ebda70cd7ed0", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
A0.4127330.4308212.141648-0.406415-0.512243-0.8137730.6159791.128972-0.113947-0.840156
B-0.8244810.6505930.7432540.543154-0.6655100.2321610.1166860.2186890.8714290.223596
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "A 0.412733 0.430821 2.141648 -0.406415 -0.512243 -0.813773 0.615979 \n", "B -0.824481 0.650593 0.743254 0.543154 -0.665510 0.232161 0.116686 \n", "\n", " 7 8 9 \n", "A 1.128972 -0.113947 -0.840156 \n", "B 0.218689 0.871429 0.223596 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(2)" ] }, { "cell_type": "markdown", "id": "8a686617-6ffd-4ae8-a950-bd474ad624e4", "metadata": {}, "source": [ "### see a random sample of rows" ] }, { "cell_type": "code", "execution_count": 12, "id": "20d08d61-a321-469c-9e61-24b5917bc8bc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
G0.4807470.4465310.665385-0.098485-0.423298-0.079718-1.687334-1.447112-1.322700-0.997247
I0.840308-1.7273200.4344240.237736-0.594150-1.4460580.072130-0.5294930.2326760.021852
N-0.499296-1.184944-0.965117-0.7252262.128470-0.8213870.838489-0.9029270.9315730.384951
R-0.005122-0.1634430.3375751.4074820.0905850.643939-2.050172-0.048718-0.843230-1.218813
F0.625590-0.3093470.456775-0.661926-0.363054-0.381738-1.1958400.486972-0.4694020.012494
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 \\\n", "G 0.480747 0.446531 0.665385 -0.098485 -0.423298 -0.079718 -1.687334 \n", "I 0.840308 -1.727320 0.434424 0.237736 -0.594150 -1.446058 0.072130 \n", "N -0.499296 -1.184944 -0.965117 -0.725226 2.128470 -0.821387 0.838489 \n", "R -0.005122 -0.163443 0.337575 1.407482 0.090585 0.643939 -2.050172 \n", "F 0.625590 -0.309347 0.456775 -0.661926 -0.363054 -0.381738 -1.195840 \n", "\n", " 7 8 9 \n", "G -1.447112 -1.322700 -0.997247 \n", "I -0.529493 0.232676 0.021852 \n", "N -0.902927 0.931573 0.384951 \n", "R -0.048718 -0.843230 -1.218813 \n", "F 0.486972 -0.469402 0.012494 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sample(5)" ] }, { "cell_type": "markdown", "id": "f36a95d5-46c6-46cd-a167-d0ca46a4f8f8", "metadata": {}, "source": [ "### get basic information about the dataframe" ] }, { "cell_type": "code", "execution_count": 13, "id": "ebe1ac22-0ad3-45ba-93e3-0f2d99539be8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 26 entries, A to Z\n", "Data columns (total 10 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 0 26 non-null float64\n", " 1 1 26 non-null float64\n", " 2 2 26 non-null float64\n", " 3 3 26 non-null float64\n", " 4 4 26 non-null float64\n", " 5 5 26 non-null float64\n", " 6 6 26 non-null float64\n", " 7 7 26 non-null float64\n", " 8 8 26 non-null float64\n", " 9 9 26 non-null float64\n", "dtypes: float64(10)\n", "memory usage: 2.2+ KB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "markdown", "id": "e2be7115-e45c-4f8d-ad29-709a327bcaf6", "metadata": {}, "source": [ "### get summary statistics" ] }, { "cell_type": "code", "execution_count": 14, "id": "e02181ac-f543-4153-8a4e-dc06d7d1ed27", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
count26.00000026.00000026.00000026.00000026.00000026.00000026.00000026.00000026.00000026.000000
mean0.213455-0.086445-0.019708-0.196146-0.2136110.010416-0.2094950.0787370.0341640.049044
std1.0176320.9685770.9580270.7929700.9654340.9640801.0158420.8703290.9900600.914550
min-1.368159-1.727320-1.682870-1.744586-2.132046-1.467045-2.050172-1.447112-2.147289-1.224469
25%-0.604638-0.654317-0.704865-0.709401-0.573673-0.749161-1.063689-0.611532-0.576423-0.445415
50%0.024017-0.177374-0.191247-0.139002-0.359659-0.150970-0.1495300.1956380.1428520.017173
75%0.7999590.2800570.4743010.2979950.2091410.6295100.4608430.7556250.5122780.376652
max2.9050672.9138622.1416481.4074822.1284702.0931682.1292471.7679302.5174741.876845
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 \\\n", "count 26.000000 26.000000 26.000000 26.000000 26.000000 26.000000 \n", "mean 0.213455 -0.086445 -0.019708 -0.196146 -0.213611 0.010416 \n", "std 1.017632 0.968577 0.958027 0.792970 0.965434 0.964080 \n", "min -1.368159 -1.727320 -1.682870 -1.744586 -2.132046 -1.467045 \n", "25% -0.604638 -0.654317 -0.704865 -0.709401 -0.573673 -0.749161 \n", "50% 0.024017 -0.177374 -0.191247 -0.139002 -0.359659 -0.150970 \n", "75% 0.799959 0.280057 0.474301 0.297995 0.209141 0.629510 \n", "max 2.905067 2.913862 2.141648 1.407482 2.128470 2.093168 \n", "\n", " 6 7 8 9 \n", "count 26.000000 26.000000 26.000000 26.000000 \n", "mean -0.209495 0.078737 0.034164 0.049044 \n", "std 1.015842 0.870329 0.990060 0.914550 \n", "min -2.050172 -1.447112 -2.147289 -1.224469 \n", "25% -1.063689 -0.611532 -0.576423 -0.445415 \n", "50% -0.149530 0.195638 0.142852 0.017173 \n", "75% 0.460843 0.755625 0.512278 0.376652 \n", "max 2.129247 1.767930 2.517474 1.876845 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe()" ] }, { "cell_type": "markdown", "id": "f8be8831-c48e-4d31-8aaa-cf4aec0bd779", "metadata": {}, "source": [ "### see only the datatypes by column" ] }, { "cell_type": "code", "execution_count": 15, "id": "087f0807-ac74-4e6b-9672-4bf3971bd75d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'0': dtype('float64'),\n", " '1': dtype('float64'),\n", " '2': dtype('float64'),\n", " '3': dtype('float64'),\n", " '4': dtype('float64'),\n", " '5': dtype('float64'),\n", " '6': dtype('float64'),\n", " '7': dtype('float64'),\n", " '8': dtype('float64'),\n", " '9': dtype('float64')}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dict(zip(df.columns, df.dtypes))" ] }, { "cell_type": "code", "execution_count": null, "id": "bdbfb946-e6c9-4368-9072-36a63d39d1ee", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.2" } }, "nbformat": 4, "nbformat_minor": 5 }