{
"cells": [
{
"cell_type": "markdown",
"id": "3ee5cf2b-f2d5-4a3f-908f-10a39cfcc4cd",
"metadata": {},
"source": [
"# Pandas\n",
"\n",
"In this notebook, we’re going to show how to:\n",
"* create data frames\n",
"* inspect data frames"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "6250ca82-6708-4f07-948f-c90ef1495f84",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "62424fc8",
"metadata": {},
"source": [
"## Create a dataframe with random data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7d78f4fd-83eb-4815-a1b3-d4b9522c9519",
"metadata": {},
"outputs": [],
"source": [
"from numpy.random import default_rng\n",
"rng = default_rng(42)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "98788698-8af6-42ce-8a57-e04dae0441f0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(['A', 'B', 'C', 'D', 'E', 'F', 'G'], ['0', '1', '2', '3'])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"index_list = list('ABCDEFG')\n",
"column_list= list('0123')\n",
"\n",
"n_rows, n_cols = len(index_list), len(column_list)\n",
"index_list, column_list"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "da06c825-ea6c-46d8-a1c1-f13d6d74675a",
"metadata": {},
"outputs": [],
"source": [
"data = rng.normal(size=(n_rows, n_cols))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "ff491935-e2de-45ff-b89a-85c18bef8092",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" 0.304717 | \n",
" -1.039984 | \n",
" 0.750451 | \n",
" 0.940565 | \n",
"
\n",
" \n",
" B | \n",
" -1.951035 | \n",
" -1.302180 | \n",
" 0.127840 | \n",
" -0.316243 | \n",
"
\n",
" \n",
" C | \n",
" -0.016801 | \n",
" -0.853044 | \n",
" 0.879398 | \n",
" 0.777792 | \n",
"
\n",
" \n",
" D | \n",
" 0.066031 | \n",
" 1.127241 | \n",
" 0.467509 | \n",
" -0.859292 | \n",
"
\n",
" \n",
" E | \n",
" 0.368751 | \n",
" -0.958883 | \n",
" 0.878450 | \n",
" -0.049926 | \n",
"
\n",
" \n",
" F | \n",
" -0.184862 | \n",
" -0.680930 | \n",
" 1.222541 | \n",
" -0.154529 | \n",
"
\n",
" \n",
" G | \n",
" -0.428328 | \n",
" -0.352134 | \n",
" 0.532309 | \n",
" 0.365444 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3\n",
"A 0.304717 -1.039984 0.750451 0.940565\n",
"B -1.951035 -1.302180 0.127840 -0.316243\n",
"C -0.016801 -0.853044 0.879398 0.777792\n",
"D 0.066031 1.127241 0.467509 -0.859292\n",
"E 0.368751 -0.958883 0.878450 -0.049926\n",
"F -0.184862 -0.680930 1.222541 -0.154529\n",
"G -0.428328 -0.352134 0.532309 0.365444"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(data=data, index=index_list, columns=column_list)\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "5ee86614-db29-43bd-8344-2b6465fdab3a",
"metadata": {},
"source": [
"### Another way to get list of letters and digits to use as index/columns "
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f5d36527-619f-48ad-bba4-e311873c8143",
"metadata": {},
"outputs": [],
"source": [
"import string\n",
"\n",
"index_list = list(string.ascii_uppercase)\n",
"column_list = list(string.digits)\n",
"\n",
"n_rows, n_cols = len(index_list), len(column_list)"
]
},
{
"cell_type": "markdown",
"id": "d6b55365-0d3b-4aea-a438-7886d5654636",
"metadata": {},
"source": [
"see [string doc](https://docs.python.org/3/library/string.html) for info on the `string` module"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "75f28162-5bcb-4645-a073-9463385b8b9d",
"metadata": {},
"outputs": [],
"source": [
"data = rng.normal(size=(n_rows, n_cols))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "377e2646-d390-4e96-8caa-75327a257bc2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" 0.412733 | \n",
" 0.430821 | \n",
" 2.141648 | \n",
" -0.406415 | \n",
" -0.512243 | \n",
" -0.813773 | \n",
" 0.615979 | \n",
" 1.128972 | \n",
" -0.113947 | \n",
" -0.840156 | \n",
"
\n",
" \n",
" B | \n",
" -0.824481 | \n",
" 0.650593 | \n",
" 0.743254 | \n",
" 0.543154 | \n",
" -0.665510 | \n",
" 0.232161 | \n",
" 0.116686 | \n",
" 0.218689 | \n",
" 0.871429 | \n",
" 0.223596 | \n",
"
\n",
" \n",
" C | \n",
" 0.678914 | \n",
" 0.067579 | \n",
" 0.289119 | \n",
" 0.631288 | \n",
" -1.457156 | \n",
" -0.319671 | \n",
" -0.470373 | \n",
" -0.638878 | \n",
" -0.275142 | \n",
" 1.494941 | \n",
"
\n",
" \n",
" D | \n",
" -0.865831 | \n",
" 0.968278 | \n",
" -1.682870 | \n",
" -0.334885 | \n",
" 0.162753 | \n",
" 0.586222 | \n",
" 0.711227 | \n",
" 0.793347 | \n",
" -0.348725 | \n",
" -0.462352 | \n",
"
\n",
" \n",
" E | \n",
" 0.857976 | \n",
" -0.191304 | \n",
" -1.275686 | \n",
" -1.133287 | \n",
" -0.919452 | \n",
" 0.497161 | \n",
" 0.142426 | \n",
" 0.690485 | \n",
" -0.427253 | \n",
" 0.158540 | \n",
"
\n",
" \n",
" F | \n",
" 0.625590 | \n",
" -0.309347 | \n",
" 0.456775 | \n",
" -0.661926 | \n",
" -0.363054 | \n",
" -0.381738 | \n",
" -1.195840 | \n",
" 0.486972 | \n",
" -0.469402 | \n",
" 0.012494 | \n",
"
\n",
" \n",
" G | \n",
" 0.480747 | \n",
" 0.446531 | \n",
" 0.665385 | \n",
" -0.098485 | \n",
" -0.423298 | \n",
" -0.079718 | \n",
" -1.687334 | \n",
" -1.447112 | \n",
" -1.322700 | \n",
" -0.997247 | \n",
"
\n",
" \n",
" H | \n",
" 0.399774 | \n",
" -0.905479 | \n",
" -0.378163 | \n",
" 1.299228 | \n",
" -0.356264 | \n",
" 0.737516 | \n",
" -0.933618 | \n",
" -0.205438 | \n",
" -0.950022 | \n",
" -0.339033 | \n",
"
\n",
" \n",
" I | \n",
" 0.840308 | \n",
" -1.727320 | \n",
" 0.434424 | \n",
" 0.237736 | \n",
" -0.594150 | \n",
" -1.446058 | \n",
" 0.072130 | \n",
" -0.529493 | \n",
" 0.232676 | \n",
" 0.021852 | \n",
"
\n",
" \n",
" J | \n",
" 1.601779 | \n",
" -0.239356 | \n",
" -1.023497 | \n",
" 0.179276 | \n",
" 0.219997 | \n",
" 1.359188 | \n",
" 0.835111 | \n",
" 0.356871 | \n",
" 1.463303 | \n",
" -1.188763 | \n",
"
\n",
" \n",
" K | \n",
" -0.639752 | \n",
" -0.926576 | \n",
" -0.389810 | \n",
" -1.376686 | \n",
" 0.635151 | \n",
" -0.222223 | \n",
" -1.470806 | \n",
" -1.015579 | \n",
" 0.313514 | \n",
" 0.838127 | \n",
"
\n",
" \n",
" L | \n",
" 1.996731 | \n",
" 2.913862 | \n",
" 0.414409 | \n",
" -0.989538 | \n",
" -2.132046 | \n",
" 0.267711 | \n",
" -0.812941 | \n",
" -0.415357 | \n",
" -0.612097 | \n",
" -0.140791 | \n",
"
\n",
" \n",
" M | \n",
" 1.065980 | \n",
" 0.157049 | \n",
" -0.158635 | \n",
" -1.035654 | \n",
" -1.674683 | \n",
" -0.486308 | \n",
" -0.053783 | \n",
" 1.767930 | \n",
" 0.130275 | \n",
" 0.982740 | \n",
"
\n",
" \n",
" N | \n",
" -0.499296 | \n",
" -1.184944 | \n",
" -0.965117 | \n",
" -0.725226 | \n",
" 2.128470 | \n",
" -0.821387 | \n",
" 0.838489 | \n",
" -0.902927 | \n",
" 0.931573 | \n",
" 0.384951 | \n",
"
\n",
" \n",
" O | \n",
" -0.156638 | \n",
" -0.040763 | \n",
" -0.654788 | \n",
" 0.446072 | \n",
" -0.454983 | \n",
" -1.225606 | \n",
" -1.277938 | \n",
" 0.172588 | \n",
" 1.579091 | \n",
" 0.159992 | \n",
"
\n",
" \n",
" P | \n",
" -0.118638 | \n",
" 0.285826 | \n",
" 1.306002 | \n",
" 0.219383 | \n",
" -0.410927 | \n",
" 1.106289 | \n",
" 0.428756 | \n",
" 1.535756 | \n",
" 0.183234 | \n",
" -1.224469 | \n",
"
\n",
" \n",
" Q | \n",
" -1.368159 | \n",
" 1.650928 | \n",
" 1.723666 | \n",
" -0.179519 | \n",
" -0.383187 | \n",
" 1.461444 | \n",
" -1.107046 | \n",
" -0.894727 | \n",
" 0.643327 | \n",
" -0.394605 | \n",
"
\n",
" \n",
" R | \n",
" -0.005122 | \n",
" -0.163443 | \n",
" 0.337575 | \n",
" 1.407482 | \n",
" 0.090585 | \n",
" 0.643939 | \n",
" -2.050172 | \n",
" -0.048718 | \n",
" -0.843230 | \n",
" -1.218813 | \n",
"
\n",
" \n",
" S | \n",
" -0.878152 | \n",
" -0.334123 | \n",
" 0.915903 | \n",
" -1.326393 | \n",
" 0.030631 | \n",
" -0.484169 | \n",
" -0.327673 | \n",
" 1.002758 | \n",
" 0.538115 | \n",
" 1.337398 | \n",
"
\n",
" \n",
" T | \n",
" -0.154506 | \n",
" -0.695943 | \n",
" -0.223859 | \n",
" 0.242497 | \n",
" 0.176573 | \n",
" -1.084388 | \n",
" 0.090490 | \n",
" 0.228228 | \n",
" 2.517474 | \n",
" 1.876845 | \n",
"
\n",
" \n",
" U | \n",
" -0.853243 | \n",
" -0.287383 | \n",
" -1.463442 | \n",
" -0.590707 | \n",
" 0.315605 | \n",
" 1.205854 | \n",
" -0.729084 | \n",
" -0.654146 | \n",
" -2.147289 | \n",
" -0.162666 | \n",
"
\n",
" \n",
" V | \n",
" -1.062414 | \n",
" -0.529439 | \n",
" -0.876861 | \n",
" -0.094263 | \n",
" -1.757728 | \n",
" -1.467045 | \n",
" 2.129247 | \n",
" -1.287423 | \n",
" -1.096786 | \n",
" 1.836914 | \n",
"
\n",
" \n",
" W | \n",
" 2.905067 | \n",
" -1.171567 | \n",
" -0.368249 | \n",
" 0.341556 | \n",
" 1.728698 | \n",
" -0.986857 | \n",
" -0.245278 | \n",
" 0.777338 | \n",
" 0.434766 | \n",
" -0.376156 | \n",
"
\n",
" \n",
" X | \n",
" -0.133823 | \n",
" -1.374896 | \n",
" -0.238174 | \n",
" -0.266387 | \n",
" 0.232170 | \n",
" -0.555327 | \n",
" 0.471539 | \n",
" 1.012716 | \n",
" 0.155429 | \n",
" 0.351756 | \n",
"
\n",
" \n",
" Y | \n",
" 0.053155 | \n",
" 0.000084 | \n",
" -0.721558 | \n",
" 0.316494 | \n",
" -0.097287 | \n",
" 2.093168 | \n",
" 1.573355 | \n",
" 0.385847 | \n",
" -0.763057 | \n",
" -1.112411 | \n",
"
\n",
" \n",
" Z | \n",
" 1.191143 | \n",
" 0.262749 | \n",
" 0.480143 | \n",
" -1.744586 | \n",
" 0.927438 | \n",
" 0.454420 | \n",
" -1.110431 | \n",
" -0.471525 | \n",
" 0.263717 | \n",
" 0.052467 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"A 0.412733 0.430821 2.141648 -0.406415 -0.512243 -0.813773 0.615979 \n",
"B -0.824481 0.650593 0.743254 0.543154 -0.665510 0.232161 0.116686 \n",
"C 0.678914 0.067579 0.289119 0.631288 -1.457156 -0.319671 -0.470373 \n",
"D -0.865831 0.968278 -1.682870 -0.334885 0.162753 0.586222 0.711227 \n",
"E 0.857976 -0.191304 -1.275686 -1.133287 -0.919452 0.497161 0.142426 \n",
"F 0.625590 -0.309347 0.456775 -0.661926 -0.363054 -0.381738 -1.195840 \n",
"G 0.480747 0.446531 0.665385 -0.098485 -0.423298 -0.079718 -1.687334 \n",
"H 0.399774 -0.905479 -0.378163 1.299228 -0.356264 0.737516 -0.933618 \n",
"I 0.840308 -1.727320 0.434424 0.237736 -0.594150 -1.446058 0.072130 \n",
"J 1.601779 -0.239356 -1.023497 0.179276 0.219997 1.359188 0.835111 \n",
"K -0.639752 -0.926576 -0.389810 -1.376686 0.635151 -0.222223 -1.470806 \n",
"L 1.996731 2.913862 0.414409 -0.989538 -2.132046 0.267711 -0.812941 \n",
"M 1.065980 0.157049 -0.158635 -1.035654 -1.674683 -0.486308 -0.053783 \n",
"N -0.499296 -1.184944 -0.965117 -0.725226 2.128470 -0.821387 0.838489 \n",
"O -0.156638 -0.040763 -0.654788 0.446072 -0.454983 -1.225606 -1.277938 \n",
"P -0.118638 0.285826 1.306002 0.219383 -0.410927 1.106289 0.428756 \n",
"Q -1.368159 1.650928 1.723666 -0.179519 -0.383187 1.461444 -1.107046 \n",
"R -0.005122 -0.163443 0.337575 1.407482 0.090585 0.643939 -2.050172 \n",
"S -0.878152 -0.334123 0.915903 -1.326393 0.030631 -0.484169 -0.327673 \n",
"T -0.154506 -0.695943 -0.223859 0.242497 0.176573 -1.084388 0.090490 \n",
"U -0.853243 -0.287383 -1.463442 -0.590707 0.315605 1.205854 -0.729084 \n",
"V -1.062414 -0.529439 -0.876861 -0.094263 -1.757728 -1.467045 2.129247 \n",
"W 2.905067 -1.171567 -0.368249 0.341556 1.728698 -0.986857 -0.245278 \n",
"X -0.133823 -1.374896 -0.238174 -0.266387 0.232170 -0.555327 0.471539 \n",
"Y 0.053155 0.000084 -0.721558 0.316494 -0.097287 2.093168 1.573355 \n",
"Z 1.191143 0.262749 0.480143 -1.744586 0.927438 0.454420 -1.110431 \n",
"\n",
" 7 8 9 \n",
"A 1.128972 -0.113947 -0.840156 \n",
"B 0.218689 0.871429 0.223596 \n",
"C -0.638878 -0.275142 1.494941 \n",
"D 0.793347 -0.348725 -0.462352 \n",
"E 0.690485 -0.427253 0.158540 \n",
"F 0.486972 -0.469402 0.012494 \n",
"G -1.447112 -1.322700 -0.997247 \n",
"H -0.205438 -0.950022 -0.339033 \n",
"I -0.529493 0.232676 0.021852 \n",
"J 0.356871 1.463303 -1.188763 \n",
"K -1.015579 0.313514 0.838127 \n",
"L -0.415357 -0.612097 -0.140791 \n",
"M 1.767930 0.130275 0.982740 \n",
"N -0.902927 0.931573 0.384951 \n",
"O 0.172588 1.579091 0.159992 \n",
"P 1.535756 0.183234 -1.224469 \n",
"Q -0.894727 0.643327 -0.394605 \n",
"R -0.048718 -0.843230 -1.218813 \n",
"S 1.002758 0.538115 1.337398 \n",
"T 0.228228 2.517474 1.876845 \n",
"U -0.654146 -2.147289 -0.162666 \n",
"V -1.287423 -1.096786 1.836914 \n",
"W 0.777338 0.434766 -0.376156 \n",
"X 1.012716 0.155429 0.351756 \n",
"Y 0.385847 -0.763057 -1.112411 \n",
"Z -0.471525 0.263717 0.052467 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(data=data, index=index_list, columns=column_list)\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "c430c3af",
"metadata": {},
"source": [
"## Inspect the dataframe"
]
},
{
"cell_type": "markdown",
"id": "c0fba470-cbfd-4014-a3c1-80a5805ab6b2",
"metadata": {},
"source": [
"### see the first/last 5 rows"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "67c9e6cc-087e-430f-893f-5fd8cd32dcb8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" 0.412733 | \n",
" 0.430821 | \n",
" 2.141648 | \n",
" -0.406415 | \n",
" -0.512243 | \n",
" -0.813773 | \n",
" 0.615979 | \n",
" 1.128972 | \n",
" -0.113947 | \n",
" -0.840156 | \n",
"
\n",
" \n",
" B | \n",
" -0.824481 | \n",
" 0.650593 | \n",
" 0.743254 | \n",
" 0.543154 | \n",
" -0.665510 | \n",
" 0.232161 | \n",
" 0.116686 | \n",
" 0.218689 | \n",
" 0.871429 | \n",
" 0.223596 | \n",
"
\n",
" \n",
" C | \n",
" 0.678914 | \n",
" 0.067579 | \n",
" 0.289119 | \n",
" 0.631288 | \n",
" -1.457156 | \n",
" -0.319671 | \n",
" -0.470373 | \n",
" -0.638878 | \n",
" -0.275142 | \n",
" 1.494941 | \n",
"
\n",
" \n",
" D | \n",
" -0.865831 | \n",
" 0.968278 | \n",
" -1.682870 | \n",
" -0.334885 | \n",
" 0.162753 | \n",
" 0.586222 | \n",
" 0.711227 | \n",
" 0.793347 | \n",
" -0.348725 | \n",
" -0.462352 | \n",
"
\n",
" \n",
" E | \n",
" 0.857976 | \n",
" -0.191304 | \n",
" -1.275686 | \n",
" -1.133287 | \n",
" -0.919452 | \n",
" 0.497161 | \n",
" 0.142426 | \n",
" 0.690485 | \n",
" -0.427253 | \n",
" 0.158540 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"A 0.412733 0.430821 2.141648 -0.406415 -0.512243 -0.813773 0.615979 \n",
"B -0.824481 0.650593 0.743254 0.543154 -0.665510 0.232161 0.116686 \n",
"C 0.678914 0.067579 0.289119 0.631288 -1.457156 -0.319671 -0.470373 \n",
"D -0.865831 0.968278 -1.682870 -0.334885 0.162753 0.586222 0.711227 \n",
"E 0.857976 -0.191304 -1.275686 -1.133287 -0.919452 0.497161 0.142426 \n",
"\n",
" 7 8 9 \n",
"A 1.128972 -0.113947 -0.840156 \n",
"B 0.218689 0.871429 0.223596 \n",
"C -0.638878 -0.275142 1.494941 \n",
"D 0.793347 -0.348725 -0.462352 \n",
"E 0.690485 -0.427253 0.158540 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "0478ee54-57a9-4209-8fb8-c248d64253d9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
" \n",
" \n",
" V | \n",
" -1.062414 | \n",
" -0.529439 | \n",
" -0.876861 | \n",
" -0.094263 | \n",
" -1.757728 | \n",
" -1.467045 | \n",
" 2.129247 | \n",
" -1.287423 | \n",
" -1.096786 | \n",
" 1.836914 | \n",
"
\n",
" \n",
" W | \n",
" 2.905067 | \n",
" -1.171567 | \n",
" -0.368249 | \n",
" 0.341556 | \n",
" 1.728698 | \n",
" -0.986857 | \n",
" -0.245278 | \n",
" 0.777338 | \n",
" 0.434766 | \n",
" -0.376156 | \n",
"
\n",
" \n",
" X | \n",
" -0.133823 | \n",
" -1.374896 | \n",
" -0.238174 | \n",
" -0.266387 | \n",
" 0.232170 | \n",
" -0.555327 | \n",
" 0.471539 | \n",
" 1.012716 | \n",
" 0.155429 | \n",
" 0.351756 | \n",
"
\n",
" \n",
" Y | \n",
" 0.053155 | \n",
" 0.000084 | \n",
" -0.721558 | \n",
" 0.316494 | \n",
" -0.097287 | \n",
" 2.093168 | \n",
" 1.573355 | \n",
" 0.385847 | \n",
" -0.763057 | \n",
" -1.112411 | \n",
"
\n",
" \n",
" Z | \n",
" 1.191143 | \n",
" 0.262749 | \n",
" 0.480143 | \n",
" -1.744586 | \n",
" 0.927438 | \n",
" 0.454420 | \n",
" -1.110431 | \n",
" -0.471525 | \n",
" 0.263717 | \n",
" 0.052467 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"V -1.062414 -0.529439 -0.876861 -0.094263 -1.757728 -1.467045 2.129247 \n",
"W 2.905067 -1.171567 -0.368249 0.341556 1.728698 -0.986857 -0.245278 \n",
"X -0.133823 -1.374896 -0.238174 -0.266387 0.232170 -0.555327 0.471539 \n",
"Y 0.053155 0.000084 -0.721558 0.316494 -0.097287 2.093168 1.573355 \n",
"Z 1.191143 0.262749 0.480143 -1.744586 0.927438 0.454420 -1.110431 \n",
"\n",
" 7 8 9 \n",
"V -1.287423 -1.096786 1.836914 \n",
"W 0.777338 0.434766 -0.376156 \n",
"X 1.012716 0.155429 0.351756 \n",
"Y 0.385847 -0.763057 -1.112411 \n",
"Z -0.471525 0.263717 0.052467 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tail()"
]
},
{
"cell_type": "markdown",
"id": "09f36b39-ead5-4b39-9df3-70c4418d2317",
"metadata": {},
"source": [
"### see different number of top/bottom rows"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "25316eb0-2fef-4385-b38c-ebda70cd7ed0",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
" \n",
" \n",
" A | \n",
" 0.412733 | \n",
" 0.430821 | \n",
" 2.141648 | \n",
" -0.406415 | \n",
" -0.512243 | \n",
" -0.813773 | \n",
" 0.615979 | \n",
" 1.128972 | \n",
" -0.113947 | \n",
" -0.840156 | \n",
"
\n",
" \n",
" B | \n",
" -0.824481 | \n",
" 0.650593 | \n",
" 0.743254 | \n",
" 0.543154 | \n",
" -0.665510 | \n",
" 0.232161 | \n",
" 0.116686 | \n",
" 0.218689 | \n",
" 0.871429 | \n",
" 0.223596 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"A 0.412733 0.430821 2.141648 -0.406415 -0.512243 -0.813773 0.615979 \n",
"B -0.824481 0.650593 0.743254 0.543154 -0.665510 0.232161 0.116686 \n",
"\n",
" 7 8 9 \n",
"A 1.128972 -0.113947 -0.840156 \n",
"B 0.218689 0.871429 0.223596 "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(2)"
]
},
{
"cell_type": "markdown",
"id": "8a686617-6ffd-4ae8-a950-bd474ad624e4",
"metadata": {},
"source": [
"### see a random sample of rows"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "20d08d61-a321-469c-9e61-24b5917bc8bc",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
" \n",
" \n",
" G | \n",
" 0.480747 | \n",
" 0.446531 | \n",
" 0.665385 | \n",
" -0.098485 | \n",
" -0.423298 | \n",
" -0.079718 | \n",
" -1.687334 | \n",
" -1.447112 | \n",
" -1.322700 | \n",
" -0.997247 | \n",
"
\n",
" \n",
" I | \n",
" 0.840308 | \n",
" -1.727320 | \n",
" 0.434424 | \n",
" 0.237736 | \n",
" -0.594150 | \n",
" -1.446058 | \n",
" 0.072130 | \n",
" -0.529493 | \n",
" 0.232676 | \n",
" 0.021852 | \n",
"
\n",
" \n",
" N | \n",
" -0.499296 | \n",
" -1.184944 | \n",
" -0.965117 | \n",
" -0.725226 | \n",
" 2.128470 | \n",
" -0.821387 | \n",
" 0.838489 | \n",
" -0.902927 | \n",
" 0.931573 | \n",
" 0.384951 | \n",
"
\n",
" \n",
" R | \n",
" -0.005122 | \n",
" -0.163443 | \n",
" 0.337575 | \n",
" 1.407482 | \n",
" 0.090585 | \n",
" 0.643939 | \n",
" -2.050172 | \n",
" -0.048718 | \n",
" -0.843230 | \n",
" -1.218813 | \n",
"
\n",
" \n",
" F | \n",
" 0.625590 | \n",
" -0.309347 | \n",
" 0.456775 | \n",
" -0.661926 | \n",
" -0.363054 | \n",
" -0.381738 | \n",
" -1.195840 | \n",
" 0.486972 | \n",
" -0.469402 | \n",
" 0.012494 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"G 0.480747 0.446531 0.665385 -0.098485 -0.423298 -0.079718 -1.687334 \n",
"I 0.840308 -1.727320 0.434424 0.237736 -0.594150 -1.446058 0.072130 \n",
"N -0.499296 -1.184944 -0.965117 -0.725226 2.128470 -0.821387 0.838489 \n",
"R -0.005122 -0.163443 0.337575 1.407482 0.090585 0.643939 -2.050172 \n",
"F 0.625590 -0.309347 0.456775 -0.661926 -0.363054 -0.381738 -1.195840 \n",
"\n",
" 7 8 9 \n",
"G -1.447112 -1.322700 -0.997247 \n",
"I -0.529493 0.232676 0.021852 \n",
"N -0.902927 0.931573 0.384951 \n",
"R -0.048718 -0.843230 -1.218813 \n",
"F 0.486972 -0.469402 0.012494 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sample(5)"
]
},
{
"cell_type": "markdown",
"id": "f36a95d5-46c6-46cd-a167-d0ca46a4f8f8",
"metadata": {},
"source": [
"### get basic information about the dataframe"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ebe1ac22-0ad3-45ba-93e3-0f2d99539be8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Index: 26 entries, A to Z\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 0 26 non-null float64\n",
" 1 1 26 non-null float64\n",
" 2 2 26 non-null float64\n",
" 3 3 26 non-null float64\n",
" 4 4 26 non-null float64\n",
" 5 5 26 non-null float64\n",
" 6 6 26 non-null float64\n",
" 7 7 26 non-null float64\n",
" 8 8 26 non-null float64\n",
" 9 9 26 non-null float64\n",
"dtypes: float64(10)\n",
"memory usage: 2.2+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"id": "e2be7115-e45c-4f8d-ad29-709a327bcaf6",
"metadata": {},
"source": [
"### get summary statistics"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "e02181ac-f543-4153-8a4e-dc06d7d1ed27",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 26.000000 | \n",
" 26.000000 | \n",
" 26.000000 | \n",
" 26.000000 | \n",
" 26.000000 | \n",
" 26.000000 | \n",
" 26.000000 | \n",
" 26.000000 | \n",
" 26.000000 | \n",
" 26.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 0.213455 | \n",
" -0.086445 | \n",
" -0.019708 | \n",
" -0.196146 | \n",
" -0.213611 | \n",
" 0.010416 | \n",
" -0.209495 | \n",
" 0.078737 | \n",
" 0.034164 | \n",
" 0.049044 | \n",
"
\n",
" \n",
" std | \n",
" 1.017632 | \n",
" 0.968577 | \n",
" 0.958027 | \n",
" 0.792970 | \n",
" 0.965434 | \n",
" 0.964080 | \n",
" 1.015842 | \n",
" 0.870329 | \n",
" 0.990060 | \n",
" 0.914550 | \n",
"
\n",
" \n",
" min | \n",
" -1.368159 | \n",
" -1.727320 | \n",
" -1.682870 | \n",
" -1.744586 | \n",
" -2.132046 | \n",
" -1.467045 | \n",
" -2.050172 | \n",
" -1.447112 | \n",
" -2.147289 | \n",
" -1.224469 | \n",
"
\n",
" \n",
" 25% | \n",
" -0.604638 | \n",
" -0.654317 | \n",
" -0.704865 | \n",
" -0.709401 | \n",
" -0.573673 | \n",
" -0.749161 | \n",
" -1.063689 | \n",
" -0.611532 | \n",
" -0.576423 | \n",
" -0.445415 | \n",
"
\n",
" \n",
" 50% | \n",
" 0.024017 | \n",
" -0.177374 | \n",
" -0.191247 | \n",
" -0.139002 | \n",
" -0.359659 | \n",
" -0.150970 | \n",
" -0.149530 | \n",
" 0.195638 | \n",
" 0.142852 | \n",
" 0.017173 | \n",
"
\n",
" \n",
" 75% | \n",
" 0.799959 | \n",
" 0.280057 | \n",
" 0.474301 | \n",
" 0.297995 | \n",
" 0.209141 | \n",
" 0.629510 | \n",
" 0.460843 | \n",
" 0.755625 | \n",
" 0.512278 | \n",
" 0.376652 | \n",
"
\n",
" \n",
" max | \n",
" 2.905067 | \n",
" 2.913862 | \n",
" 2.141648 | \n",
" 1.407482 | \n",
" 2.128470 | \n",
" 2.093168 | \n",
" 2.129247 | \n",
" 1.767930 | \n",
" 2.517474 | \n",
" 1.876845 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 0 1 2 3 4 5 \\\n",
"count 26.000000 26.000000 26.000000 26.000000 26.000000 26.000000 \n",
"mean 0.213455 -0.086445 -0.019708 -0.196146 -0.213611 0.010416 \n",
"std 1.017632 0.968577 0.958027 0.792970 0.965434 0.964080 \n",
"min -1.368159 -1.727320 -1.682870 -1.744586 -2.132046 -1.467045 \n",
"25% -0.604638 -0.654317 -0.704865 -0.709401 -0.573673 -0.749161 \n",
"50% 0.024017 -0.177374 -0.191247 -0.139002 -0.359659 -0.150970 \n",
"75% 0.799959 0.280057 0.474301 0.297995 0.209141 0.629510 \n",
"max 2.905067 2.913862 2.141648 1.407482 2.128470 2.093168 \n",
"\n",
" 6 7 8 9 \n",
"count 26.000000 26.000000 26.000000 26.000000 \n",
"mean -0.209495 0.078737 0.034164 0.049044 \n",
"std 1.015842 0.870329 0.990060 0.914550 \n",
"min -2.050172 -1.447112 -2.147289 -1.224469 \n",
"25% -1.063689 -0.611532 -0.576423 -0.445415 \n",
"50% -0.149530 0.195638 0.142852 0.017173 \n",
"75% 0.460843 0.755625 0.512278 0.376652 \n",
"max 2.129247 1.767930 2.517474 1.876845 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "markdown",
"id": "f8be8831-c48e-4d31-8aaa-cf4aec0bd779",
"metadata": {},
"source": [
"### see only the datatypes by column"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "087f0807-ac74-4e6b-9672-4bf3971bd75d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'0': dtype('float64'),\n",
" '1': dtype('float64'),\n",
" '2': dtype('float64'),\n",
" '3': dtype('float64'),\n",
" '4': dtype('float64'),\n",
" '5': dtype('float64'),\n",
" '6': dtype('float64'),\n",
" '7': dtype('float64'),\n",
" '8': dtype('float64'),\n",
" '9': dtype('float64')}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dict(zip(df.columns, df.dtypes))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bdbfb946-e6c9-4368-9072-36a63d39d1ee",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}