import akshare as ak
import pandas as pd
import numpy as np
from scipy.optimize import minimize
# 定义获取股票数据的函数
def get_stock_data(codes, start_date, end_date):
"""
Fetch historical closing price data for multiple A - share stocks.
Args:
codes (list): A list containing stock codes, each code is a 6 - digit string.
start_date (str): The start date of the data in 'YYYYMMDD' format.
end_date (str): The end date of the data in 'YYYYMMDD' format.
Returns:
pandas.DataFrame: A DataFrame where each column represents the closing price of a stock,
and the index is the trading date.
"""
# Initialize an empty dictionary to store closing price data for each stock
data_dict = {}
# Iterate through each stock code in the list
for code in codes:
try:
# Use the akshare library to get daily historical data for the current stock
df = ak.stock_zh_a_hist(symbol=code, period="daily", start_date=start_date, end_date=end_date)
# Check if the DataFrame is empty, indicating no data was retrieved
if df.empty:
# Print a warning message if no data is found
print(f"Warning: No data found for stock code {code}")
# Skip to the next stock code
continue
# Convert the '日期' (date) column to pandas datetime format
df['日期'] = pd.to_datetime(df['日期'])
# Set the '日期' (date) column as the index of the DataFrame
df.set_index('日期', inplace=True)
# Add the closing price data of the current stock to the dictionary
data_dict[code] = df['收盘']
except Exception as e:
# Print an error message if an exception occurs during data retrieval
print(f"Error fetching data for stock code {code}: {e}")
# Convert the dictionary to a DataFrame and return it
return pd.DataFrame(data_dict)
# 定义计算均值、协方差矩阵和年化收益率的函数
def calculate_statistics(returns):
"""
Calculate the annualized mean returns and the annualized covariance matrix of the given returns.
Args:
returns (pandas.DataFrame): A DataFrame containing the daily returns of multiple assets.
Returns:
tuple: A tuple containing two elements:
- mean_returns (pandas.Series): A Series containing the annualized mean returns of each asset.
- cov_matrix (pandas.DataFrame): A DataFrame representing the annualized covariance matrix of the assets.
"""
# Calculate the annualized mean returns by multiplying the daily mean returns by 252,
# assuming there are 252 trading days in a year.
mean_returns = returns.mean() * 252 # 年化平均收益率
# Calculate the annualized covariance matrix by multiplying the daily covariance matrix by 252,
# assuming there are 252 trading days in a year.
cov_matrix = returns.cov() * 252 # 年化协方差矩阵
return mean_returns, cov_matrix
# 定义计算组合收益率和风险的函数
def portfolio_performance(weights, mean_returns, cov_matrix):
returns = np.sum(mean_returns * weights)
std = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
return returns, std
# 定义最小化风险的目标函数
def minimize_risk(weights, mean_returns, cov_matrix):
return portfolio_performance(weights, mean_returns, cov_matrix)[1]
# 定义优化函数
def optimize_portfolio(mean_returns, cov_matrix, num_assets):
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) # 权重之和为 1
bounds = tuple((0, 1) for asset in range(num_assets)) # 权重范围在 0 到 1 之间
initial_guess = np.array(num_assets * [1. / num_assets])
result = minimize(minimize_risk, initial_guess, args=(mean_returns, cov_matrix),
method='SLSQP', bounds=bounds, constraints=constraints)
return result
# 主函数
def main():
# 选择要分析的股票代码,去掉 sh 和 sz 前缀
stock_codes = ['600519', '000858', '601318', '002594']
start_date = '20240101'
end_date = '20240523'
# 获取股票数据
stock_data = get_stock_data(stock_codes, start_date, end_date)
if stock_data.empty:
print("No valid stock data obtained. Exiting...")
return
# 计算日收益率,注意每日涨跌率和日收益率的区别!
# 日收益率 = (当日收盘价 - 前一日收盘价) / 前一日收盘价
# 涨跌率 = 当日收盘价 / 前一日收盘价 - 1
returns = stock_data.pct_change()
# # 手动计算日收益率并验证
# manual_returns = pd.DataFrame(index=returns.index, columns=returns.columns)
# for code in stock_codes:
# for i in range(1, len(stock_data)):
# prev_close = stock_data[code].iloc[i - 1]
# current_close = stock_data[code].iloc[i]
# manual_returns[code].iloc[i] = (current_close - prev_close) / prev_close
# manual_returns = manual_returns.dropna()
print("收盘价数据:")
print(stock_data.tail())
print("自动计算的日收益率(%):")
print(returns.tail() * 100)
# print("手动计算的日收益率(%):")
# print(manual_returns.tail() * 100)
returns = returns.dropna()
# 计算均值和协方差矩阵
mean_returns, cov_matrix = calculate_statistics(returns)
num_assets = len(stock_codes)
# 进行投资组合优化
optimal_result = optimize_portfolio(mean_returns, cov_matrix, num_assets)
# 输出最优权重
optimal_weights = optimal_result.x
optimal_returns, optimal_std = portfolio_performance(optimal_weights, mean_returns, cov_matrix)
print("最优权重:")
for i in range(num_assets):
print(f"{stock_codes[i]}: {optimal_weights[i]:.4f}")
print(f"最优组合年化收益率: {optimal_returns:.4f}")
print(f"最优组合年化风险(标准差): {optimal_std:.4f}")
if __name__ == "__main__":
main()
收盘价数据:
600519 000858 601318 002594
日期
2024-05-17 1715.00 156.41 45.20 219.59
2024-05-20 1709.00 157.30 45.46 222.87
2024-05-21 1705.00 156.54 45.40 218.78
2024-05-22 1697.71 155.12 45.40 216.92
2024-05-23 1692.01 153.60 44.78 214.95
自动计算的日收益率(%):
600519 000858 601318 002594
日期
2024-05-17 0.616016 1.531970 4.994193 0.586322
2024-05-20 -0.349854 0.569017 0.575221 1.493693
2024-05-21 -0.234055 -0.483153 -0.131984 -1.835151
2024-05-22 -0.427566 -0.907116 0.000000 -0.850169
2024-05-23 -0.335746 -0.979887 -1.365639 -0.908169
最优权重:
600519: 0.7324
000858: 0.0000
601318: 0.2205
002594: 0.0472
最优组合年化收益率: 0.1205
最优组合年化风险(标准差): 0.1801