import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
# 定義股票代碼和大盤指數
tickers = ["2330.TW", "2454.TW", "2317.TW", "2412.TW", "1303.TW", "2882.TW", "3008.TW", "2308.TW", "1402.TW",
"1216.TW", "2881.TW", "2891.TW", "2382.TW", "2409.TW", "1802.TW", "1101.TW", "3045.TW", "2324.TW",
"2105.TW", "2880.TW", "2887.TW", "2885.TW", "4904.TW", "2603.TW", "2884.TW", "2886.TW", "2357.TW",
"2344.TW", "4938.TW", "2888.TW", "^TWII"]
# 下載股票數據
data = yf.download(tickers, start="2021-01-01", end="2024-06-24")
# 使用前向填充處理缺失值
data = data.ffill()
# 提取調整後收盤價
adj_close = data['Adj Close']
# 計算日變動率
daily_change = adj_close.pct_change()
# 計算 Beta 值
def calculate_beta(stock_returns, market_returns, window):
cov_matrix = stock_returns.rolling(window).cov(market_returns)
var_market = market_returns.rolling(window).var()
beta = cov_matrix.div(var_market, axis=0)
return beta
# 市場回報率
market_returns = daily_change["^TWII"]
# 計算 Beta_120
beta_120 = daily_change.apply(lambda x: calculate_beta(x, market_returns, 120))
# 整合所有特徵變數
features = pd.DataFrame()
for ticker in tickers[:-1]: # 除去 "^TWII"
features[ticker] = beta_120[ticker]
# 使用前向填充處理缺失值
features = features.ffill()
# 增加技術指標特徵
for ticker in tickers[:-1]:
features[f'{ticker}_MA7'] = adj_close[ticker].rolling(window=7).mean()
features[f'{ticker}_MA21'] = adj_close[ticker].rolling(window=21).mean()
features[f'{ticker}_RSI14'] = (100 - (100 / (1 + adj_close[ticker].pct_change().rolling(window=14).mean())))
features[f'{ticker}_MACD'] = adj_close[ticker].ewm(span=12, adjust=False).mean() - adj_close[ticker].ewm(span=26, adjust=False).mean()
features[f'{ticker}_BB_upper'] = adj_close[ticker].rolling(window=20).mean() + 2*adj_close[ticker].rolling(window=20).std()
features[f'{ticker}_BB_lower'] = adj_close[ticker].rolling(window=20).mean() - 2*adj_close[ticker].rolling(window=20).std()
# 增加前一天的 TAIEX 值
features['Previous_TAIEX'] = adj_close['^TWII'].shift(1)
# 使用前向填充處理缺失值
features = features.ffill()
# 確保没有 NaN 值
features = features.fillna(0)
# 設置目標變數
target = adj_close["^TWII"]
# 構建線性回歸模型
X = features # 使用之前處理好的特徵
y = target # 使用調整後的收盤價作為目標變數
# 添加常數項
X = sm.add_constant(X)
# 構建模型
model = sm.OLS(y, X).fit()
# 顯示模型總結
print(model.summary())
OLS Regression Results
==============================================================================
Dep. Variable: ^TWII R-squared: 0.803
Model: OLS Adj. R-squared: 0.800
Method: Least Squares F-statistic: 401.5
Date: Tue, 27 Jun 2024 Prob (F-statistic): 0.00
Time: 10:54:56 Log-Likelihood: -2436.4
No. Observations: 1000 AIC: 4915.
Df Residuals: 989 BIC: 4968.
Df Model: 10
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 1.2398 0.035 35.329 0.000 1.171 1.309
Previous_TAIEX 0.9996 0.003 355.192 0.000 0.994 1.005
2330.TW -0.0013 0.002 -0.722 0.471 -0.005 0.002
2454.TW 0.0008 0.001 0.647 0.518 -0.002 0.004
2317.TW 0.0011 0.001 1.027 0.305 -0.001 0.003
2412.TW 0.0020 0.001 2.116 0.035 0.000 0.004
...
==============================================================================
Omnibus: 0.952 Durbin-Watson: 2.072
Prob(Omnibus): 0.622 Jarque-Bera (JB): 0.867
Skew: 0.079 Prob(JB): 0.648
Kurtosis: 2.940 Cond. No. 1.01e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
# 定義股票代碼和大盤指數
tickers = ["2330.TW", "2454.TW", "2317.TW", "2412.TW", "1303.TW", "2882.TW", "3008.TW", "2308.TW", "1402.TW",
"1216.TW", "2881.TW", "2891.TW", "2382.TW", "2409.TW", "1802.TW", "1101.TW", "3045.TW", "2324.TW",
"2105.TW", "2880.TW", "2887.TW", "2885.TW", "4904.TW", "2603.TW", "2884.TW", "2886.TW", "2357.TW",
"2344.TW", "4938.TW", "2888.TW", "^TWII"]
# 下載股票數據
data = yf.download(tickers, start="2021-01-01", end="2024-06-24")
# 使用前向填充處理缺失值
data = data.ffill()
# 提取調整後收盤價
adj_close = data['Adj Close']
# 計算日變動率
daily_change = adj_close.pct_change()
# 計算 Beta 值
def calculate_beta(stock_returns, market_returns, window):
cov_matrix = stock_returns.rolling(window).cov(market_returns)
var_market = market_returns.rolling(window).var()
beta = cov_matrix.div(var_market, axis=0)
return beta
# 市場回報率
market_returns = daily_change["^TWII"]
# 計算 Beta_120
beta_120 = daily_change.apply(lambda x: calculate_beta(x, market_returns, 120))
# 整合所有特徵變數
features = pd.DataFrame()
for ticker in tickers[:-1]: # 除去 "^TWII"
features[ticker] = beta_120[ticker]
# 使用前向填充處理缺失值
features = features.ffill()
# 增加技術指標特徵
for ticker in tickers[:-1]:
features[f'{ticker}_MA7'] = adj_close[ticker].rolling(window=7).mean()
features[f'{ticker}_MA21'] = adj_close[ticker].rolling(window=21).mean()
features[f'{ticker}_RSI14'] = (100 - (100 / (1 + adj_close[ticker].pct_change().rolling(window=14).mean())))
features[f'{ticker}_MACD'] = adj_close[ticker].ewm(span=12, adjust=False).mean() - adj_close[ticker].ewm(span=26, adjust=False).mean()
features[f'{ticker}_BB_upper'] = adj_close[ticker].rolling(window=20).mean() + 2*adj_close[ticker].rolling(window=20).std()
features[f'{ticker}_BB_lower'] = adj_close[ticker].rolling(window=20).mean() - 2*adj_close[ticker].rolling(window=20).std()
# 增加前一天的 TAIEX 值
features['Previous_TAIEX'] = adj_close['^TWII'].shift(1)
# 使用前向填充處理缺失值
features = features.ffill()
# 確保没有 NaN 值
features = features.fillna(0)
# 設置目標變數
target = adj_close["^TWII"]
# 構建線性回歸模型
X = features # 使用之前處理好的特徵
y = target # 使用調整後的收盤價作為目標變數
# 添加常數項
X = sm.add_constant(X)
# 構建模型
model = sm.OLS(y, X).fit()
# 顯示模型總結
print(model.summary())