새소식

TIL

한국 주요 기업 주가 간의 상관관계 분석

  • -
import yfinance as yf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

naver = yf.download('035420.KS', start='2016-01-01')
samsung = yf.download('005930.KS', start='2016-01-01')
hynix = yf.download('000660.KS', start='2016-01-01')
hyundai = yf.download('005380.KS', start='2016-01-01')
kia = yf.download('000270.KS', start='2016-01-01')
kakao = yf.download('035720.KS', start='2016-01-01')
lg = yf.download('066570.KS', start='2016-01-01')
posco = yf.download('005490.KS', start='2016-01-01')
kb = yf.download('105560.KS', start='2016-01-01')
shinhan = yf.download('055550.KS', start='2016-01-01')

naver_df = naver.Close.to_frame(name='Close')
samsung_df = samsung.Close.to_frame(name='Close')
hynix_df = hynix.Close.to_frame(name='Close')
hyundai_df = hyundai.Close.to_frame(name='Close')
kia_df = kia.Close.to_frame(name='Close')
kakao_df = kakao.Close.to_frame(name='Close')
lg_df = lg.Close.to_frame(name='Close')
posco_df = posco.Close.to_frame(name='Close')
kb_df = kb.Close.to_frame(name='Close')
shinhan_df = shinhan.Close.to_frame(name='Close')

# naver_df, samsung_df, hynix_df, hyundai_df, kia_df, kakao_df, lg_df, posco_df, kb_df, shinhan_df

# 종가 데이터를 기준으로 합칩니다.
dataframes = [naver_df['Close'], samsung_df['Close'], hynix_df['Close'], hyundai_df['Close'], kia_df['Close'], kakao_df['Close'], lg_df['Close'], posco_df['Close'], kb_df['Close'], shinhan_df['Close']]

# 데이터프레임들을 하나의 데이터프레임으로 합치기
combined_df = pd.concat(dataframes, axis=1, join='inner')
combined_df.columns = ['Naver', 'Samsung', 'Hynix', 'Hyundai', 'Kia', 'Kakao', 'LG', 'Posco', 'KB', 'Shinhan']

# 피어슨 상관관계 계산
corr_matrix = combined_df.corr(method='pearson')

# 히트맵 그리기
plt.figure(figsize=(10, 10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', square=True)
plt.title('Pearson Correlation Heatmap')
plt.show()

from statsmodels.tsa.stattools import grangercausalitytests

# 스피어만 순위 상관 계수 계산
spearman_corr_matrix = combined_df.corr(method='spearman')

# 켄달의 타우 계산
kendall_corr_matrix = combined_df.corr(method='kendall')

# 크로스 코릴레이션 계산
cross_corr_matrix = combined_df.corr(method=lambda x, y: np.correlate(x, y, mode='full')[len(x)-1] / (np.std(x) * np.std(y) * len(x)))

# 그레인저 인과성 검정 (Granger Causality Test)
def granger_causality_matrix(data, maxlag=1):
    variables = data.columns
    n_vars = len(variables)
    gc_matrix = pd.DataFrame(np.zeros((n_vars, n_vars)), columns=variables, index=variables)
    for col in data.columns:
        for row in data.columns:
            test_result = grangercausalitytests(data[[row, col]], maxlag=maxlag, verbose=False)
            p_values = [test_result[i+1][0]['ssr_ftest'][1] for i in range(maxlag)]
            min_p_value = np.min(p_values)
            gc_matrix.loc[row, col] = min_p_value
    return gc_matrix

granger_causality_matrix = granger_causality_matrix(combined_df, maxlag=1)

# 히트맵 그리기
fig, axes = plt.subplots(2, 2, figsize=(20, 20))

sns.heatmap(spearman_corr_matrix, annot=True, cmap='coolwarm', square=True, ax=axes[0, 0])
axes[0, 0].set_title('Spearman Rank Correlation Coefficient Heatmap')

sns.heatmap(kendall_corr_matrix, annot=True, cmap='coolwarm', square=True, ax=axes[0, 1])
axes[0, 1].set_title("Kendall's Tau Heatmap")

sns.heatmap(cross_corr_matrix, annot=True, cmap='coolwarm', square=True, ax=axes[1, 0])
axes[1, 0].set_title('Cross-correlation Heatmap')

sns.heatmap(granger_causality_matrix, annot=True, cmap='coolwarm', square=True, ax=axes[1, 1])
axes[1, 1].set_title('Granger Causality Test Heatmap')

plt.show()

Contents

포스팅 주소를 복사했습니다

이 글이 도움이 되었다면 공감 부탁드립니다.