A股配置优化:均数-方差模型应用实战

import akshare as ak
import pandas as pd
import numpy as np
from scipy.optimize import minimize

# 定义获取股票数据的函数
def get_stock_data(codes, start_date, end_date):
    """
    Fetch historical closing price data for multiple A - share stocks.

    Args:
        codes (list): A list containing stock codes, each code is a 6 - digit string.
        start_date (str): The start date of the data in 'YYYYMMDD' format.
        end_date (str): The end date of the data in 'YYYYMMDD' format.

    Returns:
        pandas.DataFrame: A DataFrame where each column represents the closing price of a stock,
                          and the index is the trading date.
    """
    # Initialize an empty dictionary to store closing price data for each stock
    data_dict = {}
    # Iterate through each stock code in the list
    for code in codes:
        try:
            # Use the akshare library to get daily historical data for the current stock
            df = ak.stock_zh_a_hist(symbol=code, period="daily", start_date=start_date, end_date=end_date)
            # Check if the DataFrame is empty, indicating no data was retrieved
            if df.empty:
                # Print a warning message if no data is found
                print(f"Warning: No data found for stock code {code}")
                # Skip to the next stock code
                continue
            # Convert the '日期' (date) column to pandas datetime format
            df['日期'] = pd.to_datetime(df['日期'])
            # Set the '日期' (date) column as the index of the DataFrame
            df.set_index('日期', inplace=True)
            # Add the closing price data of the current stock to the dictionary
            data_dict[code] = df['收盘']
        except Exception as e:
            # Print an error message if an exception occurs during data retrieval
            print(f"Error fetching data for stock code {code}: {e}")
    # Convert the dictionary to a DataFrame and return it
    return pd.DataFrame(data_dict)

# 定义计算均值、协方差矩阵和年化收益率的函数
def calculate_statistics(returns):
    """
    Calculate the annualized mean returns and the annualized covariance matrix of the given returns.

    Args:
        returns (pandas.DataFrame): A DataFrame containing the daily returns of multiple assets.

    Returns:
        tuple: A tuple containing two elements:
            - mean_returns (pandas.Series): A Series containing the annualized mean returns of each asset.
            - cov_matrix (pandas.DataFrame): A DataFrame representing the annualized covariance matrix of the assets.
    """
    # Calculate the annualized mean returns by multiplying the daily mean returns by 252,
    # assuming there are 252 trading days in a year.
    mean_returns = returns.mean() * 252  # 年化平均收益率
    # Calculate the annualized covariance matrix by multiplying the daily covariance matrix by 252,
    # assuming there are 252 trading days in a year.
    cov_matrix = returns.cov() * 252  # 年化协方差矩阵
    return mean_returns, cov_matrix

# 定义计算组合收益率和风险的函数
def portfolio_performance(weights, mean_returns, cov_matrix):
    returns = np.sum(mean_returns * weights)
    std = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
    return returns, std

# 定义最小化风险的目标函数
def minimize_risk(weights, mean_returns, cov_matrix):
    return portfolio_performance(weights, mean_returns, cov_matrix)[1]

# 定义优化函数
def optimize_portfolio(mean_returns, cov_matrix, num_assets):
    constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})  # 权重之和为 1
    bounds = tuple((0, 1) for asset in range(num_assets))  # 权重范围在 0 到 1 之间
    initial_guess = np.array(num_assets * [1. / num_assets])
    result = minimize(minimize_risk, initial_guess, args=(mean_returns, cov_matrix),
                      method='SLSQP', bounds=bounds, constraints=constraints)
    return result

# 主函数
def main():
    # 选择要分析的股票代码,去掉 sh 和 sz 前缀
    stock_codes = ['600519', '000858', '601318', '002594']
    start_date = '20240101'
    end_date = '20240523'

    # 获取股票数据
    stock_data = get_stock_data(stock_codes, start_date, end_date)

    if stock_data.empty:
        print("No valid stock data obtained. Exiting...")
        return

    # 计算日收益率,注意每日涨跌率和日收益率的区别!
    # 日收益率 = (当日收盘价 - 前一日收盘价) / 前一日收盘价 
    # 涨跌率 = 当日收盘价 / 前一日收盘价 - 1
    
    returns = stock_data.pct_change()
    # # 手动计算日收益率并验证
    # manual_returns = pd.DataFrame(index=returns.index, columns=returns.columns)
    # for code in stock_codes:
    #     for i in range(1, len(stock_data)):
    #         prev_close = stock_data[code].iloc[i - 1]
    #         current_close = stock_data[code].iloc[i]
    #         manual_returns[code].iloc[i] = (current_close - prev_close) / prev_close
    # manual_returns = manual_returns.dropna()

    print("收盘价数据:")
    print(stock_data.tail())
    print("自动计算的日收益率(%):")
    print(returns.tail() * 100)
    # print("手动计算的日收益率(%):")
    # print(manual_returns.tail() * 100)

    returns = returns.dropna()

    # 计算均值和协方差矩阵
    mean_returns, cov_matrix = calculate_statistics(returns)

    num_assets = len(stock_codes)

    # 进行投资组合优化
    optimal_result = optimize_portfolio(mean_returns, cov_matrix, num_assets)

    # 输出最优权重
    optimal_weights = optimal_result.x
    optimal_returns, optimal_std = portfolio_performance(optimal_weights, mean_returns, cov_matrix)

    print("最优权重:")
    for i in range(num_assets):
        print(f"{stock_codes[i]}: {optimal_weights[i]:.4f}")
    print(f"最优组合年化收益率: {optimal_returns:.4f}")
    print(f"最优组合年化风险(标准差): {optimal_std:.4f}")

if __name__ == "__main__":
    main()

收盘价数据:
600519 000858 601318 002594
日期
2024-05-17 1715.00 156.41 45.20 219.59
2024-05-20 1709.00 157.30 45.46 222.87
2024-05-21 1705.00 156.54 45.40 218.78
2024-05-22 1697.71 155.12 45.40 216.92
2024-05-23 1692.01 153.60 44.78 214.95
自动计算的日收益率(%):
600519 000858 601318 002594
日期
2024-05-17 0.616016 1.531970 4.994193 0.586322
2024-05-20 -0.349854 0.569017 0.575221 1.493693
2024-05-21 -0.234055 -0.483153 -0.131984 -1.835151
2024-05-22 -0.427566 -0.907116 0.000000 -0.850169
2024-05-23 -0.335746 -0.979887 -1.365639 -0.908169
最优权重:
600519: 0.7324
000858: 0.0000
601318: 0.2205
002594: 0.0472
最优组合年化收益率: 0.1205
最优组合年化风险(标准差): 0.1801