from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


import pandas as pd

df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/남북한발전전력량.xlsx', header=None)
df


df = pd.read_json('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/read_json_sample.json')
df


data = {
    'name' : ['c0', 'c1', 'c2'],
    'number1' : [0, 1, 2],
    'number2' : [3, 4, 5],
    'number3' : [6, 7, 8],


}

df = pd.DataFrame(data)
df.set_index('name', inplace=True)
display(df)

df.to_csv('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/df_csv_sample.csv')


import pandas as pd
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/auto-mpg.xlsx', header = None)
df.columns = ['mpg', 'cylinders', 'displacement', 'hourespower',
              'weight', 'acceleration', 'model_year', 'origin', 'name']
df


display('df.head()', df.head())
display('df.tail()', df.tail())

'df.head()'

'df.tail()'


df.shape

(398, 9)


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 398 entries, 0 to 397
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   mpg           398 non-null    float64
 1   cylinders     398 non-null    int64  
 2   displacement  398 non-null    float64
 3   hourespower   398 non-null    object 
 4   weight        398 non-null    int64  
 5   acceleration  398 non-null    float64
 6   model_year    398 non-null    int64  
 7   origin        398 non-null    int64  
 8   name          398 non-null    object 
dtypes: float64(3), int64(4), object(2)
memory usage: 28.1+ KB


df.dtypes

mpg             float64
cylinders         int64
displacement    float64
hourespower      object
weight            int64
acceleration    float64
model_year        int64
origin            int64
name             object
dtype: object


df.describe()


df.describe(include = 'all')


df.count()

mpg             398
cylinders       398
displacement    398
hourespower     398
weight          398
acceleration    398
model_year      398
origin          398
name            398
dtype: int64


df['origin'].value_counts()

1    249
3     79
2     70
Name: origin, dtype: int64


df.mean()

<ipython-input-14-c61f0c8f89b5>:1: FutureWarning: The default value of numeric_only in DataFrame.mean is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.
  df.mean()

mpg               23.514573
cylinders          5.454774
displacement     193.425879
weight          2970.424623
acceleration      15.568090
model_year        76.010050
origin             1.572864
dtype: float64


df[['mpg', 'displacement']].mean()

mpg              23.514573
displacement    193.425879
dtype: float64


df.median()

<ipython-input-16-6d467abf240d>:1: FutureWarning: The default value of numeric_only in DataFrame.median is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.
  df.median()

mpg               23.0
cylinders          4.0
displacement     148.5
weight          2803.5
acceleration      15.5
model_year        76.0
origin             1.0
dtype: float64


df[['mpg', 'displacement']].median()

mpg              23.0
displacement    148.5
dtype: float64


df.max()

<ipython-input-18-4c1ddf8920ff>:1: FutureWarning: The default value of numeric_only in DataFrame.max is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.
  df.max()

mpg                         46.6
cylinders                      8
displacement               455.0
weight                      5140
acceleration                24.8
model_year                    82
origin                         3
name            vw rabbit custom
dtype: object


df[['mpg', 'displacement']].max()

mpg              46.6
displacement    455.0
dtype: float64


df.min()

<ipython-input-20-c3612c624a3f>:1: FutureWarning: The default value of numeric_only in DataFrame.min is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.
  df.min()

mpg                                 9.0
cylinders                             3
displacement                       68.0
weight                             1613
acceleration                        8.0
model_year                           70
origin                                1
name            amc ambassador brougham
dtype: object


df[['mpg', 'displacement']].min()

mpg              9.0
displacement    68.0
dtype: float64


df.std()

<ipython-input-22-ce97bb7eaef8>:1: FutureWarning: The default value of numeric_only in DataFrame.std is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.
  df.std()

mpg               7.815984
cylinders         1.701004
displacement    104.269838
weight          846.841774
acceleration      2.757689
model_year        3.697627
origin            0.802055
dtype: float64


df[['mpg', 'displacement']].std()

mpg               7.815984
displacement    104.269838
dtype: float64


df.var()

<ipython-input-24-28ded241fd7c>:1: FutureWarning: The default value of numeric_only in DataFrame.var is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.
  df.var()

mpg                 61.089611
cylinders            2.893415
displacement     10872.199152
weight          717140.990526
acceleration         7.604848
model_year          13.672443
origin               0.643292
dtype: float64


df[['mpg', 'displacement']].var()

mpg                61.089611
displacement    10872.199152
dtype: float64


df.corr()

<ipython-input-26-2f6f6606aa2c>:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  df.corr()


import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# 상관관계 계산
corr_matrix = df.corr()

# 히트맵 그리기
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

<ipython-input-27-e60676ae1155>:6: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  corr_matrix = df.corr()


df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/남북한발전전력량.xlsx')
df


df_ns = df.iloc[[0,5],2:]
df_ns.index = ['south','north']
df_ns.columns = df_ns.columns.map(int)
display(df_ns)

df_ns.plot()

<Axes: >


df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/남북한발전전력량.xlsx')
df_ns = df.iloc[[0,5],2:]
df_ns.index = ['south','north']
df_ns.columns = df_ns.columns.map(int)
df_ns = df_ns.T
display(df_ns.head(5))

df_ns.plot()

<Axes: >


df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/남북한발전전력량.xlsx')
df_ns = df.iloc[[0,5],2:]
df_ns.index = ['south','north']
df_ns.columns = df_ns.columns.map(int)
df_ns = df_ns.T
display(df_ns.head(5))

df_ns.plot(kind = 'bar')

<Axes: >


import pandas as pd
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/auto-mpg.xlsx', header = None)
df.columns = ['mpg', 'cylinders', 'displacement', 'hourespower',
              'weight', 'acceleration', 'model_year', 'origin', 'name']
df.plot(x = 'mpg', y = 'weight', kind = 'scatter')

<Axes: xlabel='mpg', ylabel='weight'>


import pandas as pd
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/auto-mpg.xlsx', header = None)
df.columns = ['mpg', 'cylinders', 'displacement', 'hourespower',
              'weight', 'acceleration', 'model_year', 'origin', 'name']

df['mpg'].plot(kind='box')

<Axes: >


import pandas as pd
df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')
df.head()


df = df.fillna(method='ffill')
df


#설치후 런타임 재시작
#!sudo apt-get install -y fonts-nanum
#!sudo fc-cache -fv
#!rm ~/.cache/matplotlib -rf


import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['font.family'] = 'NanumBarunGothic'

df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')

df = df.fillna(method = "ffill")

mask = (df['전출지별'] == "서울특별시") & (df['전입지별'] != "서울특별시")
df_seoul = df[mask]
df_seoul.drop(['전출지별'], axis=1, inplace = True)
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
df_seoul.set_index('전입지', inplace=True)

sr_gg = df_seoul.loc['경기도']

# 그림판 크기 확장
plt.figure(figsize = (14, 5))

plt.plot(sr_gg.index, sr_gg.values)

# x축의 눈금 이름 기울이기
plt.xticks(rotation = 90)
# plt.xticks(rotation = "vertical")

plt.title("서울 -> 경기 이동 인구수")

plt.xlabel("기간")
plt.ylabel("이동 인구수")

plt.show()

<ipython-input-41-261a7f2f8e73>:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_seoul.drop(['전출지별'], axis=1, inplace = True)
<ipython-input-41-261a7f2f8e73>:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)


import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['font.family'] = 'NanumBarunGothic'

df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')

df = df.fillna(method = "ffill")

mask = (df['전출지별'] == "서울특별시") & (df['전입지별'] != "서울특별시")
df_seoul = df[mask]
df_seoul.drop(['전출지별'], axis=1, inplace = True)
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
df_seoul.set_index('전입지', inplace=True)

sr_gg = df_seoul.loc['경기도']

# 그림판 테마 바꾸기
plt.style.use('ggplot')

plt.figure(figsize = (14, 5))

# 차트를 그릴 때 여러 속성 넣기 가능
plt.plot(sr_gg.index, sr_gg.values, marker = "o", markersize = 10)

plt.xticks(rotation = 90)

plt.title("서울 -> 경기 이동 인구수")

plt.xlabel("기간")
plt.ylabel("이동 인구수")

plt.show()

<ipython-input-43-c5af1814a28a>:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_seoul.drop(['전출지별'], axis=1, inplace = True)
<ipython-input-43-c5af1814a28a>:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)


import matplotlib.pyplot as plt

print(plt.style.available)

['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']


import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['font.family'] = 'NanumBarunGothic'

df = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/2023코랩/csv모음/시도별 전출입 인구수.xlsx')

df = df.fillna(method = "ffill")

mask = (df['전출지별'] == "서울특별시") & (df['전입지별'] != "서울특별시")
df_seoul = df[mask]
df_seoul.drop(['전출지별'], axis=1, inplace = True)
df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)
df_seoul.set_index('전입지', inplace=True)

sr_gg = df_seoul.loc['경기도']

plt.style.use('ggplot')

plt.figure(figsize = (14, 5))

plt.plot(sr_gg.index, sr_gg.values, marker = "o", markersize = 10)

# y축 값의 범위 지정 가능
plt.ylim(50000, 800000)

# 화살표 표시
plt.annotate('',                  # 텍스트
             xy=(20, 550000),     # 머리 부분
             xytext=(3, 350000),  # 꼬리 부분
             xycoords='data',     # 좌표 체계
             arrowprops=dict(arrowstyle='->', color='skyblue', lw=5))   # 화살표 특성

plt.annotate('인구 이동 증가',       # 텍스트
             xy=(10, 470000),     # 텍스트 기준
             rotation = 21,       # 기울임(시계 반대 방향)
             va="center",         # 수직(위 / 아래) - (top / bottom)
             ha="center",         # 수평(좌 / 우) - (left / right)
             fontsize=15)         # 글자 크기

# 화살표 표시
plt.annotate('',                  # 텍스트
             xy=(45, 430000),     # 머리 부분
             xytext=(34, 560000), # 꼬리 부분
             xycoords='data',     # 좌표 체계
             arrowprops=dict(arrowstyle='->', color='skyblue', lw=5))   # 화살표 특성

plt.annotate('인구 이동 감소',       # 텍스트
             xy=(40, 520000),     # 텍스트 기준
             rotation = -21,      # 기울임(시계 반대 방향)
             va="center",         # 수직(위 / 아래) - (top / bottom)
             ha="center",         # 수평(좌 / 우) - (left / right)
             fontsize=15)         # 글자 크기

plt.xticks(rotation = 90)

plt.title("서울 -> 경기 이동 인구수")

plt.xlabel("기간")
plt.ylabel("이동 인구수")

plt.show()

<ipython-input-45-f269fce5f4b0>:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_seoul.drop(['전출지별'], axis=1, inplace = True)
<ipython-input-45-f269fce5f4b0>:13: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_seoul.rename({'전입지별':'전입지'}, axis=1, inplace=True)

	0	1	2	3	4	5	6	7	8	9	...	23	24	25	26	27	28	29	30	31	32
0	전력량 (억㎾h)	발전 전력별	1990	1991	1992	1993	1994	1995	1996	1997	...	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020
1	남한	합계	1077	1186	1310	1444	1650	1847	2055	2244	...	4969	5096	5171	5220	5281	5404	5535	5707	5608	5450
2	NaN	수력	64	51	49	60	41	55	52	54	...	78	77	84	78	58	66	70	73	62	71
3	NaN	화력	484	573	696	803	1022	1122	1264	1420	...	3343	3430	3581	3427	3402	3523	3738	4018	3774	3466
4	NaN	원자력	529	563	565	581	587	670	739	771	...	1547	1503	1388	1564	1648	1620	1484	1335	1459	1602
5	NaN	신재생	-	-	-	-	-	-	-	-	...	-	86	118	151	173	195	243	281	313	311
6	북한	합계	277	263	247	221	231	230	213	193	...	211	215	221	216	190	239	235	249	238	182
7	NaN	수력	156	150	142	133	138	142	125	107	...	132	135	139	130	100	128	119	128	110	128
8	NaN	화력	121	113	105	88	93	88	88	86	...	79	80	82	86	90	111	116	121	128	53.8
9	NaN	원자력	-	-	-	-	-	-	-	-	...	-	-	-	-	-	-	-	-	-	-

	year	developer	opensource
pandas	2008	Wes Mckinneye	True
NumPy	2006	Travis Oliphant	True
matplotlib	2003	John D. Hunter	True

	mpg	cylinders	displacement	hourespower	weight	acceleration	model_year	origin	name
0	18.0	8	307.0	130	3504	12.0	70	1	chevrolet chevelle malibu
1	15.0	8	350.0	165	3693	11.5	70	1	buick skylark 320
2	18.0	8	318.0	150	3436	11.0	70	1	plymouth satellite
3	16.0	8	304.0	150	3433	12.0	70	1	amc rebel sst
4	17.0	8	302.0	140	3449	10.5	70	1	ford torino
...	...	...	...	...	...	...	...	...	...
393	27.0	4	140.0	86	2790	15.6	82	1	ford mustang gl
394	44.0	4	97.0	52	2130	24.6	82	2	vw pickup
395	32.0	4	135.0	84	2295	11.6	82	1	dodge rampage
396	28.0	4	120.0	79	2625	18.6	82	1	ford ranger
397	31.0	4	119.0	82	2720	19.4	82	1	chevy s-10

	mpg	cylinders	displacement	hourespower	weight	acceleration	model_year	origin	name
0	18.0	8	307.0	130	3504	12.0	70	1	chevrolet chevelle malibu
1	15.0	8	350.0	165	3693	11.5	70	1	buick skylark 320
2	18.0	8	318.0	150	3436	11.0	70	1	plymouth satellite
3	16.0	8	304.0	150	3433	12.0	70	1	amc rebel sst
4	17.0	8	302.0	140	3449	10.5	70	1	ford torino

	mpg	cylinders	displacement	hourespower	weight	acceleration	model_year	origin	name
393	27.0	4	140.0	86	2790	15.6	82	1	ford mustang gl
394	44.0	4	97.0	52	2130	24.6	82	2	vw pickup
395	32.0	4	135.0	84	2295	11.6	82	1	dodge rampage
396	28.0	4	120.0	79	2625	18.6	82	1	ford ranger
397	31.0	4	119.0	82	2720	19.4	82	1	chevy s-10

기술통계, 시각화(6.8)

엑셀 파일 불러오기¶

json 파일 불러오기¶

csv파일로 저장하기¶

컬럼 설정하기¶

데이터 미리보기¶

데이터프레임 크기 확인¶

데이터프레임 정보 확인¶

컬럼들 기술통계 요약 정보¶

데이터의 개수 뽑아오기¶

특정 열의 데이터 개수 출출¶

통계함수¶

평균값¶

중간값¶

최댓값¶

최솟값¶

표준편차¶

분산¶

상관계수¶

데이터의 관계확인¶

판다스 내장 그래프 확인¶

matplotlib 시각화 도구¶

서울특별시에서 다른 곳으로 이동한 인구 수 추이¶

그림판 쪼개기¶

1. figure >>> 그림판 크기 지정 >>> 객체화¶

2. axe객체 생성(add-subplot())¶

	mpg	cylinders	displacement	weight	acceleration	model_year	origin
count	398.000000	398.000000	398.000000	398.000000	398.000000	398.000000	398.000000
mean	23.514573	5.454774	193.425879	2970.424623	15.568090	76.010050	1.572864
std	7.815984	1.701004	104.269838	846.841774	2.757689	3.697627	0.802055
min	9.000000	3.000000	68.000000	1613.000000	8.000000	70.000000	1.000000
25%	17.500000	4.000000	104.250000	2223.750000	13.825000	73.000000	1.000000
50%	23.000000	4.000000	148.500000	2803.500000	15.500000	76.000000	1.000000
75%	29.000000	8.000000	262.000000	3608.000000	17.175000	79.000000	2.000000
max	46.600000	8.000000	455.000000	5140.000000	24.800000	82.000000	3.000000

	mpg	cylinders	displacement	weight	acceleration	model_year	origin
mpg	1.000000	-0.775396	-0.804203	-0.831741	0.420289	0.579267	0.563450
cylinders	-0.775396	1.000000	0.950721	0.896017	-0.505419	-0.348746	-0.562543
displacement	-0.804203	0.950721	1.000000	0.932824	-0.543684	-0.370164	-0.609409
weight	-0.831741	0.896017	0.932824	1.000000	-0.417457	-0.306564	-0.581024
acceleration	0.420289	-0.505419	-0.543684	-0.417457	1.000000	0.288137	0.205873
model_year	0.579267	-0.348746	-0.370164	-0.306564	0.288137	1.000000	0.180662
origin	0.563450	-0.562543	-0.609409	-0.581024	0.205873	0.180662	1.000000

	1990	1991	1992	1993	1994	1995	1996	1997	1998	1999	...	2011	2012	2013	2014	2015	2016	2017	2018	2019	2020
south	1077	1186	1310	1444	1650	1847	2055	2244	2153	2393	...	4969	5096	5171	5220	5281	5404	5535	5707	5608	5450
north	277	263	247	221	231	230	213	193	170	186	...	211	215	221	216	190	239	235	249	238	182

	전출지별	전입지별	1970	1971	1972	1973	1974	1975	1976	1977	...	2008	2009	2010	2011	2012	2013	2014	2015	2016	2017
0	전출지별	전입지별	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	...	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)	이동자수 (명)
1	전국	전국	4046536	4210164	3687938	4860418	5297969	9011440	6773250	7397623	...	8808256	8487275	8226594	8127195	7506691	7411784	7629098	7755286	7378430	7154226
2	NaN	서울특별시	1742813	1671705	1349333	1831858	2050392	3396662	2756510	2893403	...	2025358	1873188	1733015	1721748	1555281	1520090	1573594	1589431	1515602	1472937
3	NaN	부산광역시	448577	389797	362202	482061	680984	805979	724664	785117	...	514502	519310	519334	508043	461042	478451	485710	507031	459015	439073
4	NaN	대구광역시	-	-	-	-	-	-	-	-	...	409938	398626	370817	370563	348642	351873	350213	351424	328228	321182

	number1	number2	number3
name
c0	0	3	6
c1	1	4	7
c2	2	5	8