In [ ]:
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
In [ ]:
#!sudo apt-get install -y fonts-nanum
#!sudo fc-cache -fv
#!rm ~/.cache/matplotlib -rf
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'NanumBarunGothic'
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')
df = df.fillna(method = "ffill")
mask = (df['전출지별'] == "서울특별시") & (df['전입지별'] != "서울특별시")
df_seoul = df[mask]
df_seoul.drop(['전출지별'], axis=1, inplace = True)
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
df_seoul.set_index('전입지', inplace=True)
sr_gg = df_seoul.loc['경기도']
plt.style.use('ggplot')
# 그림판 객체화 후 서브 객체 만들기(그림판 쪼개기)
ax = plt.figure(figsize = (14, 5))
ax1 = ax.add_subplot(2, 1, 1)
ax2 = ax.add_subplot(2, 1, 2)
# 객체로 분류가 되었기 때문에 객체에 직접 설정을 해야 함
ax1.plot(sr_gg, 'o', markersize = 10)
ax2.plot(sr_gg, marker='o', markerfacecolor = 'green', markersize = 10,
color = 'olive', linewidth = 5, label = "서울 -> 경기")
# axe 객체 y축 범위 지정
ax1.set_ylim(50000, 800000)
ax2.set_ylim(50000, 800000)
# axe 객체 x축 눈금 라벨 회전
ax1.set_xticklabels(sr_gg.index, rotation = 90)
ax2.set_xticklabels(sr_gg.index, rotation = 90)
plt.show()
<ipython-input-3-95b3dec1ad5f>:12: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.drop(['전출지별'], axis=1, inplace = True)
<ipython-input-3-95b3dec1ad5f>:13: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
<ipython-input-3-95b3dec1ad5f>:35: UserWarning: FixedFormatter should only be used together with FixedLocator
ax1.set_xticklabels(sr_gg.index, rotation = 90)
<ipython-input-3-95b3dec1ad5f>:36: UserWarning: FixedFormatter should only be used together with FixedLocator
ax2.set_xticklabels(sr_gg.index, rotation = 90)
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'NanumBarunGothic'
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')
df = df.fillna(method = "ffill")
mask = (df['전출지별'] == "서울특별시") & (df['전입지별'] != "서울특별시")
df_seoul = df[mask]
df_seoul.drop(['전출지별'], axis=1, inplace = True)
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
df_seoul.set_index('전입지', inplace=True)
sr_gg = df_seoul.loc['경기도']
plt.style.use('ggplot')
ax = plt.figure(figsize = (14, 5))
ax1 = ax.add_subplot(2, 1, 1)
ax2 = ax.add_subplot(2, 1, 2)
ax1.plot(sr_gg, 'o', markersize = 10)
ax2.plot(sr_gg, marker='o', markerfacecolor = 'green', markersize = 10,
color = 'olive', linewidth = 5, label = "서울 -> 경기")
ax1.set_ylim(50000, 800000)
ax2.set_ylim(50000, 800000)
ax1.set_xticklabels(sr_gg.index, rotation = 90)
ax2.set_xticklabels(sr_gg.index, rotation = 90)
# 차트 제목
ax1.set_title('서울 -> 경기 이동 인구수')
ax2.set_title('서울 -> 경기 이동 인구수')
# 축 제목
ax1.set_xlabel('기간')
ax1.set_xlabel('이동 인구수')
ax2.set_xlabel('기간')
ax2.set_xlabel('이동 인구수')
# 간격 조정
plt.tight_layout()
plt.show()
<ipython-input-5-a76ce8c6db36>:12: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.drop(['전출지별'], axis=1, inplace = True)
<ipython-input-5-a76ce8c6db36>:13: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
<ipython-input-5-a76ce8c6db36>:31: UserWarning: FixedFormatter should only be used together with FixedLocator
ax1.set_xticklabels(sr_gg.index, rotation = 90)
<ipython-input-5-a76ce8c6db36>:32: UserWarning: FixedFormatter should only be used together with FixedLocator
ax2.set_xticklabels(sr_gg.index, rotation = 90)
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'NanumBarunGothic'
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')
df = df.fillna(method = "ffill")
mask = (df['전출지별'] == "서울특별시") & (df['전입지별'] != "서울특별시")
df_seoul = df[mask]
df_seoul.drop(['전출지별'], axis=1, inplace = True)
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
df_seoul.set_index('전입지', inplace=True)
#display(df_seoul)
# 충청북도, 전라남도, 경상북도(1970 ~ 2017)
col_years = list(map(str, range(1970,2018)))
df_3 = df_seoul.loc[['충청북도','전라남도','경상북도','강원도'], col_years]
fig = plt.figure(figsize = (20,15))
ax1 = fig.add_subplot(2,2,1)
ax2 = fig.add_subplot(2,2,2)
ax3 = fig.add_subplot(2,2,3)
ax4 = fig.add_subplot(2,2,4)
# 한 그림판에 여러 차트 그리기
ax1.plot(col_years, df_3.loc['충청북도',:], marker = 'o', markerfacecolor = 'green', markersize = 10, color = 'olive', linewidth = 2, label = '서울 >>> 충북')
ax2.plot(col_years, df_3.loc['전라남도',:], marker = 'o', markerfacecolor = 'blue', markersize = 10, color = 'olive', linewidth = 2, label = '서울 >>> 전남')
ax3.plot(col_years, df_3.loc['경상북도',:], marker = 'o', markerfacecolor = 'red', markersize = 10, color = 'olive', linewidth = 2, label = '서울 >>> 경북')
ax4.plot(col_years, df_3.loc['강원도',:], marker = 'o', markerfacecolor = 'yellow', markersize = 10, color = 'olive', linewidth = 2, label = '서울 >>> 강원')
ax1.legend(loc = 'best')
ax2.legend(loc = 'best')
ax3.legend(loc = 'best')
ax4.legend(loc = 'best')
ax1.set_title('서울 >>> 충북')
ax2.set_title('서울 >>> 전남')
ax3.set_title('서울 >>> 경북')
ax4.set_title('서울 >>> 강원')
ax1.set_xlabel('기간',size=10)
ax1.set_ylabel('이동 인구수', size=10)
ax2.set_xlabel('기간',size=10)
ax2.set_ylabel('이동 인구수', size=10)
ax3.set_xlabel('기간',size=10)
ax3.set_ylabel('이동 인구수', size=10)
ax4.set_xlabel('기간',size=10)
ax4.set_ylabel('이동 인구수', size=10)
#기울기 조정
ax1.set_xticklabels(col_years, rotation=90)
ax2.set_xticklabels(col_years, rotation=90)
ax3.set_xticklabels(col_years, rotation=90)
ax4.set_xticklabels(col_years, rotation=90)
#눈금 크기 조정
ax1.tick_params(axis = 'x', labelsize = 10)
ax1.tick_params(axis = 'y', labelsize = 10)
ax2.tick_params(axis = 'x', labelsize = 10)
ax2.tick_params(axis = 'y', labelsize = 10)
ax3.tick_params(axis = 'x', labelsize = 10)
ax3.tick_params(axis = 'y', labelsize = 10)
ax4.tick_params(axis = 'x', labelsize = 10)
ax4.tick_params(axis = 'y', labelsize = 10)
plt.show()
<ipython-input-37-de7a300a959b>:12: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.drop(['전출지별'], axis=1, inplace = True)
<ipython-input-37-de7a300a959b>:13: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
<ipython-input-37-de7a300a959b>:57: UserWarning: FixedFormatter should only be used together with FixedLocator
ax1.set_xticklabels(col_years, rotation=90)
<ipython-input-37-de7a300a959b>:58: UserWarning: FixedFormatter should only be used together with FixedLocator
ax2.set_xticklabels(col_years, rotation=90)
<ipython-input-37-de7a300a959b>:59: UserWarning: FixedFormatter should only be used together with FixedLocator
ax3.set_xticklabels(col_years, rotation=90)
<ipython-input-37-de7a300a959b>:60: UserWarning: FixedFormatter should only be used together with FixedLocator
ax4.set_xticklabels(col_years, rotation=90)
면적그래프¶
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'NanumBarunGothic'
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')
df = df.fillna(method = "ffill")
mask = (df['전출지별'] == "서울특별시") & (df['전입지별'] != "서울특별시")
df_seoul = df[mask]
df_seoul.drop(['전출지별'], axis=1, inplace = True)
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
df_seoul.set_index('전입지', inplace=True)
#display(df_seoul)
# 충청북도, 전라남도, 경상북도(1970 ~ 2017)
col_years = list(map(str, range(1970,2018)))
df_4 = df_seoul.loc[['충청북도','전라남도','경상북도','강원도'], col_years]
df_4 = df_4.T
#면적 그래프 그리기
df_4.plot(kind='area', stacked=False, alpha=0.2)
plt.title('서울 >>>> 타시도 이동 인구수')
plt.xlabel('기간')
plt.ylabel('이동 인구수')
plt.show()
<ipython-input-43-ba92b9f88fc8>:12: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.drop(['전출지별'], axis=1, inplace = True)
<ipython-input-43-ba92b9f88fc8>:13: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
수직 막대 그래프¶
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'NanumBarunGothic'
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')
df = df.fillna(method = "ffill")
mask = (df['전출지별'] == "서울특별시") & (df['전입지별'] != "서울특별시")
df_seoul = df[mask]
df_seoul.drop(['전출지별'], axis=1, inplace = True)
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
df_seoul.set_index('전입지', inplace=True)
#display(df_seoul)
# 충청북도, 전라남도, 경상북도(2000 ~ 2017)
col_years = list(map(str, range(2000,2018)))
df_4 = df_seoul.loc[['충청북도','전라남도','경상북도','강원도'], col_years]
df_4 = df_4.T
#수직막대 그래프 그리기
df_4.plot(kind='bar', figsize=(20, 10), width=0.7, color=['yellow', 'green', 'blue', 'skyblue'])
plt.title('서울 >>>> 타시도 이동 인구수')
plt.xlabel('기간')
plt.ylabel('이동 인구수')
plt.show()
<ipython-input-46-7a99440dfe86>:12: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.drop(['전출지별'], axis=1, inplace = True)
<ipython-input-46-7a99440dfe86>:13: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
수평 막대 그래프¶
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'NanumBarunGothic'
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')
df = df.fillna(method = "ffill")
mask = (df['전출지별'] == "서울특별시") & (df['전입지별'] != "서울특별시")
df_seoul = df[mask]
df_seoul.drop(['전출지별'], axis=1, inplace = True)
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
df_seoul.set_index('전입지', inplace=True)
#display(df_seoul)
# 충청북도, 전라남도, 경상북도(2000 ~ 2017)
col_years = list(map(str, range(2000,2018)))
df_4 = df_seoul.loc[['충청북도','전라남도','경상북도','강원도'], col_years]
df_4['합계'] = df_4.sum(axis = 1)
df_total = df_4[['합계']].sort_values(by='합계', ascending=False)
display(df_total)
#수평막대 그래프 그리기
df_total.plot(kind='barh', figsize=(20, 10), width=0.7)
plt.title('서울 >>>> 타시도 이동 인구수')
plt.xlabel('기간')
plt.ylabel('이동 인구수')
plt.show()
<ipython-input-51-b4bddc1a7b37>:12: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.drop(['전출지별'], axis=1, inplace = True)
<ipython-input-51-b4bddc1a7b37>:13: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
| 합계 | |
|---|---|
| 전입지 | |
| 강원도 | 398481.0 |
| 전라남도 | 302765.0 |
| 경상북도 | 265010.0 |
| 충청북도 | 260973.0 |
히스토그램¶
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('classic')
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/auto-mpg.xlsx', header=None)
df.columns = ['mpg','cylinders','displacement','hoursepower','weight','acceleration','model_year','origin','name']
display(df.head())
df['mpg'].plot(kind ='hist', bins = 10, figsize = (10,5))
plt.title('Histogram')
plt.xlabel('mpg')
plt.show()
| mpg | cylinders | displacement | hoursepower | weight | acceleration | model_year | origin | name | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 18.0 | 8 | 307.0 | 130 | 3504 | 12.0 | 70 | 1 | chevrolet chevelle malibu |
| 1 | 15.0 | 8 | 350.0 | 165 | 3693 | 11.5 | 70 | 1 | buick skylark 320 |
| 2 | 18.0 | 8 | 318.0 | 150 | 3436 | 11.0 | 70 | 1 | plymouth satellite |
| 3 | 16.0 | 8 | 304.0 | 150 | 3433 | 12.0 | 70 | 1 | amc rebel sst |
| 4 | 17.0 | 8 | 302.0 | 140 | 3449 | 10.5 | 70 | 1 | ford torino |
산점도¶
In [ ]:
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('classic')
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/auto-mpg.xlsx', header=None)
df.columns = ['mpg','cylinders','displacement','hoursepower','weight','acceleration','model_year','origin','name']
display(df.head())
cylinder_size = df['cylinders'] / df['cylinders'].max() * 300
df.plot(x='mpg', y='weight', kind='scatter', color='coral', s = cylinder_size, figsize=(10, 5), alpha = 0.6)
plt.title('scatter')
plt.show()
| mpg | cylinders | displacement | hoursepower | weight | acceleration | model_year | origin | name | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 18.0 | 8 | 307.0 | 130 | 3504 | 12.0 | 70 | 1 | chevrolet chevelle malibu |
| 1 | 15.0 | 8 | 350.0 | 165 | 3693 | 11.5 | 70 | 1 | buick skylark 320 |
| 2 | 18.0 | 8 | 318.0 | 150 | 3436 | 11.0 | 70 | 1 | plymouth satellite |
| 3 | 16.0 | 8 | 304.0 | 150 | 3433 | 12.0 | 70 | 1 | amc rebel sst |
| 4 | 17.0 | 8 | 302.0 | 140 | 3449 | 10.5 | 70 | 1 | ford torino |
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('classic')
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/auto-mpg.xlsx', header=None)
df.columns = ['mpg','cylinders','displacement','hoursepower','weight','acceleration','model_year','origin','name']
display(df.head())
cylinder_size = df['cylinders'] / df['cylinders'].max() * 300
df.plot(x='mpg', y='weight', kind='scatter', color=cylinder_size, s = 10, figsize=(10, 5), alpha = 0.6, cmap = 'viridis')
plt.title('scatter')
plt.show()
| mpg | cylinders | displacement | hoursepower | weight | acceleration | model_year | origin | name | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 18.0 | 8 | 307.0 | 130 | 3504 | 12.0 | 70 | 1 | chevrolet chevelle malibu |
| 1 | 15.0 | 8 | 350.0 | 165 | 3693 | 11.5 | 70 | 1 | buick skylark 320 |
| 2 | 18.0 | 8 | 318.0 | 150 | 3436 | 11.0 | 70 | 1 | plymouth satellite |
| 3 | 16.0 | 8 | 304.0 | 150 | 3433 | 12.0 | 70 | 1 | amc rebel sst |
| 4 | 17.0 | 8 | 302.0 | 140 | 3449 | 10.5 | 70 | 1 | ford torino |
<ipython-input-67-8be327e93105>:12: UserWarning: 'color' and 'colormap' cannot be used simultaneously. Using 'color' df.plot(x='mpg', y='weight', kind='scatter', color=cylinder_size, s = 10, figsize=(10, 5), alpha = 0.6, cmap = 'viridis')
seaborn¶
In [ ]:
!pip install seaborn
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/ Requirement already satisfied: seaborn in /usr/local/lib/python3.10/dist-packages (0.12.2) Requirement already satisfied: numpy!=1.24.0,>=1.17 in /usr/local/lib/python3.10/dist-packages (from seaborn) (1.22.4) Requirement already satisfied: pandas>=0.25 in /usr/local/lib/python3.10/dist-packages (from seaborn) (1.5.3) Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in /usr/local/lib/python3.10/dist-packages (from seaborn) (3.7.1) Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.7) Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0) Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.39.3) Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4) Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.1) Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (8.4.0) Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.0.9) Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.25->seaborn) (2022.7.1) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.16.0)
In [ ]:
import seaborn as sns
titanic = sns.load_dataset('titanic')
display(titanic.head())
print('\n')
display(titanic.tail())
| survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
| 1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
| 2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | Third | woman | False | NaN | Southampton | yes | True |
| 3 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S | First | woman | False | C | Southampton | yes | False |
| 4 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S | Third | man | True | NaN | Southampton | no | True |
| survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 886 | 0 | 2 | male | 27.0 | 0 | 0 | 13.00 | S | Second | man | True | NaN | Southampton | no | True |
| 887 | 1 | 1 | female | 19.0 | 0 | 0 | 30.00 | S | First | woman | False | B | Southampton | yes | True |
| 888 | 0 | 3 | female | NaN | 1 | 2 | 23.45 | S | Third | woman | False | NaN | Southampton | no | False |
| 889 | 1 | 1 | male | 26.0 | 0 | 0 | 30.00 | C | First | man | True | C | Cherbourg | yes | True |
| 890 | 0 | 3 | male | 32.0 | 0 | 0 | 7.75 | Q | Third | man | True | NaN | Queenstown | no | True |
In [ ]:
import seaborn as sns
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('darkgrid')
fig = plt.figure(figsize = (20,10))
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)
# 회귀선이 있는 산점도
sns.regplot(x = 'age',
y = 'fare',
data = titanic,
ax = ax1
)
sns.regplot(x = 'age',
y = 'fare',
data = titanic,
ax = ax2,
fit_reg = False
)
plt.show()
In [ ]:
import seaborn as sns
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('darkgrid')
fig = plt.figure(figsize = (20,10))
ax1 = fig.add_subplot(1, 3, 1)
ax2 = fig.add_subplot(1, 3, 2)
ax3 = fig.add_subplot(1, 3, 3)
#히스토그램과 커널 밀도 그래프
sns.distplot(titanic['fare'], ax = ax1)
#커널 밀도 그래프
sns.kdeplot(titanic['fare'], ax = ax2)
#히스토그램
sns.histplot(x = 'fare', data = titanic, ax = ax3)
ax1.set_title('titanic fare - dist/plot')
ax2.set_title('titanic fare - kde/plot')
ax3.set_title('titanic fare - hist/plot')
plt.show()
<ipython-input-80-76215909575e>:15: UserWarning: `distplot` is a deprecated function and will be removed in seaborn v0.14.0. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). For a guide to updating your code to use the new functions, please see https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751 sns.distplot(titanic['fare'], ax = ax1)
히트맵¶
In [ ]:
import seaborn as sns
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('darkgrid')
#데이터 재구성
table = titanic.pivot_table(index = ['sex'], columns = ['class'], aggfunc = 'size')
#히트맵
sns.heatmap(table, #데이터
annot=True, #값 표시 여부
fmt = 'd', #데이터 형식
cmap = 'YlGnBu', #컬러맵 지정
linewidth=2, #구분선 두꼐
cbar = True) #컬러바 표시 여부
plt.show()
In [ ]:
import seaborn as sns
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('darkgrid')
fig = plt.figure(figsize = (20,10))
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)
#분산을 고려하지 않은 범주형 산점도
sns.stripplot(x = 'class', #x축
y = 'age', #y축
hue = 'sex', #다중 데이터
data = titanic, #사용할 데이터
ax = ax1 #차트 위치
)
#분산을 고려한 범주형 산점도
sns.swarmplot(x = 'class', #x축
y = 'age', #y축
hue = 'sex', #다중 데이터
data = titanic, #사용할 데이터
ax = ax2 #차트 위치
)
ax1.set_title('Strip Plot')
ax2.set_title('Swarm Plot')
plt.show()
In [ ]:
import seaborn as sns
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('darkgrid')
fig = plt.figure(figsize = (20,10))
ax1 = fig.add_subplot(1, 3, 1)
ax2 = fig.add_subplot(1, 3, 2)
ax3 = fig.add_subplot(1, 3, 3)
# 막대 그래프
sns.barplot(x='sex', y='survived', data=titanic, ax=ax1)
# 누적하지 않고 추가 항목을 추가한 막대 그래프
sns.barplot(x='sex', y='survived', hue='class', data=titanic, ax=ax2)
# 누적하고 추가 항목을 추가한 막대 그래프
sns.barplot(x='sex', y='survived', hue='class', dodge=False, data=titanic, ax=ax3)
plt.show()
In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('darkgrid')
fig = plt.figure(figsize = (20,10))
ax1 = fig.add_subplot(1, 3, 1)
ax2 = fig.add_subplot(1, 3, 2)
ax3 = fig.add_subplot(1, 3, 3)
# 밀도 그래프
sns.countplot(x='class',data=titanic, ax=ax1)
# 누적하지 않고 추가 항목을 추가한 밀도 그래프
sns.countplot(x='class', hue='who', palette='Set2', data=titanic, ax=ax2)
# 누적하고 추가 항목을 추가한 밀도 그래프
sns.countplot(x='class', hue='who', palette='Set3', dodge=False, data=titanic, ax=ax3)
plt.show()
In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('darkgrid')
fig = plt.figure(figsize = (20,10))
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
ax4 = fig.add_subplot(2, 2, 4)
# 기본 박스플롯
sns.boxplot(x = 'alive', y = 'age', data=titanic, ax = ax1)
# 추가 항목이 있는 박스플롯
sns.boxplot(x = 'alive', y = 'age', hue = 'sex', data=titanic, ax = ax2)
# 기본 바이올린플롯
sns.violinplot(x = 'alive', y = 'age', data = titanic, ax = ax3)
# 추가 항목이 있는 바이올린플롯
sns.violinplot(x = 'alive', y = 'age', hue = 'sex', data = titanic, ax = ax4)
plt.show()
In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('whitegrid')
# 조인트 그래프
j1 = sns.jointplot(x = 'fare', y = 'age', data=titanic)
# 회귀선 조인트 그래프
j2 = sns.jointplot(x='fare', y = 'age', kind = 'reg', data=titanic)
# 회귀선 조인트 그래프
j3 = sns.jointplot(x='fare', y = 'age', kind = 'hex', data=titanic)
# 회귀선 조인트 그래프
j4 = sns.jointplot(x='fare', y = 'age', kind = 'kde', data=titanic)
plt.show()
In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('whitegrid')
g = sns.FacetGrid(data = titanic, col = 'who', row = 'survived')
g = g.map(plt.hist, 'age')
plt.show()
In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
titanic = sns.load_dataset('titanic')
# 테마 지정
sns.set_style('whitegrid')
# 이변수 데이터 분포
titanic_pair = titanic[['age','pclass','fare']]
p = sns.pairplot(titanic_pair)
plt.show()
In [ ]:
'데이터 분석 기초' 카테고리의 다른 글
| 딥러닝, 머신러닝 실습( 회귀분석 ) (0) | 2023.10.08 |
|---|---|
| 시계열 분석 기초(arima, sarima) (0) | 2023.07.25 |
| 기술통계, 시각화(6.8) (0) | 2023.06.08 |
| 데이터프레임 조작(6.7) (0) | 2023.06.08 |
| numpy(6.5) (0) | 2023.06.05 |