Close_TAIEX -N.Scatter Plot Matrix_0630

Jun 30, 2024

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Load the data
file_path = '/content/drive/My Drive/MSCI_Taiwan_30_data.csv'
data = pd.read_csv(file_path)

# Select only numerical columns
numerical_data = data.select_dtypes(include=[np.number])

# Handle NaN and infinite values by replacing them with the mean of the column
numerical_data = numerical_data.apply(lambda x: np.where(np.isfinite(x), x, np.nan))
numerical_data = numerical_data.apply(lambda x: x.fillna(x.mean()), axis=0)

# Calculate Pearson correlation with target variable 'Close_TAIEX'
target_variable = 'Close_TAIEX'
correlation_results = {}

for column in numerical_data.columns:
    if column != target_variable:
        correlation, p_value = pearsonr(numerical_data[target_variable], numerical_data[column])
        correlation_results[column] = {'Pearson Correlation': correlation, 'P-value': p_value}

# Convert results to DataFrame for better visualization
correlation_df = pd.DataFrame.from_dict(correlation_results, orient='index')
correlation_df = correlation_df.sort_values(by='Pearson Correlation', ascending=False)

print("Pearson Correlation with Close_TAIEX and corresponding P-values:")
print(correlation_df)

# Plot scatter plots for each explanatory variable vs. Close_TAIEX
plt.figure(figsize=(20, 20))
for i, column in enumerate(correlation_df.index):
    plt.subplot(6, 6, i + 1)  # Adjust the number of rows and columns based on the number of variables
    sns.scatterplot(x=numerical_data[target_variable], y=numerical_data[column])
    plt.title(f'{column} vs. {target_variable}\n(r={correlation_df.loc[column, "Pearson Correlation"]:.2f}, p={correlation_df.loc[column, "P-value"]:.2e})')
    plt.xlabel(target_variable)
    plt.ylabel(column)
plt.tight_layout()
plt.show()

import pandas as pd import yfinance as yf import ta from datetime import datetime

Mounted at /content/drive
Pearson Correlation with Close_TAIEX and corresponding P-values:
                Pearson Correlation        P-value
RSI21                      0.181235  1.788855e-239
RSI14                      0.149536  7.892582e-163
CMO14                      0.097049   2.915690e-69
RSI7                       0.094208   2.352703e-65
Aroon Up                   0.092640   3.003308e-63
CCI20                      0.085391   5.606595e-54
MACD Line                  0.080482   3.944215e-48
Signal Line                0.079971   1.530058e-47
%D                         0.065723   1.223437e-32
%K                         0.062314   1.670302e-29
WILLR14                    0.062314   1.670302e-29
Market Return              0.053534   3.417693e-22
Adj_Close                  0.040832   1.508200e-13
Lower Band                 0.025166   5.331465e-06
Stock Return               0.024956   6.379107e-06
Low                        0.022923   3.390795e-05
Close_MSCI                 0.022754   3.870897e-05
Open                       0.022309   5.473519e-05
MA7                        0.022306   5.485074e-05
High                       0.022106   6.395461e-05
Middle Band                0.021395   1.092130e-04
MA21                       0.021350   1.128899e-04
MA50                       0.020800   1.688435e-04
MA100                      0.018208   9.919776e-04
Upper Band                 0.018121   1.049533e-03
MACD Histogram             0.012237   2.691003e-02
Volume                    -0.014191   1.028059e-02
Band Width                -0.021713   8.609264e-05
Aroon Down                -0.081898   8.808426e-50
Beta_120                  -0.124150  1.815258e-112
Beta_60                   -0.164698  1.474482e-197

import pandas as pd import yfinance as yf import ta from datetime import datetime

將相關技術指標分為微弱相關、正相關和負相關的表格，包括它們的相關係數（r）和 p 值：

類別指標r 值p 值微弱相關RSI21-0.017.19e-239RSI140.037.19e-168RSI7-0.092.73e-56Aroon Up0.039.03e-63Market Return0.092.43e-22Adj Close0.041.15e-13Lower Band0.035.33e-06Stock Return0.023.16e-06Low0.023.39e-05Close MSCI0.023.78e-05Open0.025.47e-05MA70.024.95e-05High0.026.46e-05Middle Band0.029.96e-04MA210.021.34e-04MA500.021.96e-04MA100.029.92e-04Upper Band0.025.05e-03MACD Histogram0.013.10e-02Band Width-0.028.16e-05正相關RSI140.037.19e-168CCI200.099.15e-54MACD Line0.051.11e-45Signal Line0.051.71e-42WILLR140.091.67e-29負相關CMO14-0.192.92e-69RSI7-0.092.73e-56%D-0.071.12e-29%K-0.061.47e-29Aroon Down-0.088.18e-50Beta 120-0.121.82e-112

變數分群的討論

OPEN_High_Low_Close

Code

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Load the data
file_path = '/content/drive/My Drive/MSCI_Taiwan_30_data.csv'
data = pd.read_csv(file_path)

# Convert 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'], format='%Y/%m/%d')

# Define the target variable
target_variable = 'Close_TAIEX'

# Function to create scatter plot matrix by variable
def scatter_plot_matrix_by_variable():
    variables = [col for col in data.columns if col not in ['Date', 'ST_Code', 'ST_Name', target_variable]]

    for variable in variables:
        plt.figure(figsize=(25, 25))
        unique_stock_codes = data['ST_Code'].unique()

        for i, stock_code in enumerate(unique_stock_codes):
            stock_data = data[data['ST_Code'] == stock_code]
            taiex_data = data[['Date', target_variable]].drop_duplicates()

            aligned_data = pd.merge(stock_data, taiex_data, on='Date', suffixes=('', '_TAIEX'))
            aligned_data = aligned_data.replace([np.inf, -np.inf], np.nan).dropna()

            if aligned_data.shape[0] > 0 and variable in aligned_data.columns:
                correlation, p_value = pearsonr(aligned_data[target_variable], aligned_data[variable])
                plt.subplot((len(unique_stock_codes) + 4) // 5, 5, i + 1)
                sns.scatterplot(x=aligned_data[target_variable], y=aligned_data[variable])
                sns.regplot(x=aligned_data[target_variable], y=aligned_data[variable], scatter=False, color='red', ci=95, line_kws={'linestyle': 'dashed'})
                stock_name = aligned_data['ST_Name'].iloc[0]  # Get the stock name from the first row
                plt.title(f'{stock_code} ({stock_name})\n(r={correlation:.2f}, p={p_value:.2e})')
                plt.xlabel('TAIEX')
                plt.ylabel(variable)
        
        plt.suptitle(f'Scatter Plot Matrix for {variable}', fontsize=20)
        plt.tight_layout(rect=[0, 0, 1, 0.95])
        plt.savefig(f'/content/drive/My Drive/scatter_plot_matrix_{variable}.png')
        plt.show()

# Generate scatter plot matrix by variable
scatter_plot_matrix_by_variable()

import pandas as pd import yfinance as yf import ta from datetime import datetime

小島視角 | Island Vantage

Discussion about this post

Ready for more?