2026/4/17 1:50:52
网站建设
项目流程
给境外合法网站做数据,企业网站建设既可以外包给专业的网站建设服务公司,网站留言板有什么用,wordpress顶部菜单函数功能说明
本代码实现了一个基于强化学习#xff08;RL#xff09;和长短期记忆网络#xff08;LSTM#xff09;的量化交易策略。该策略通过LSTM模型对历史价格数据进行特征提取#xff0c;再利用强化学习算法#xff08;如DQN或PPO#xff09;训练智能体#xff0c;使其…功能说明本代码实现了一个基于强化学习RL和长短期记忆网络LSTM的量化交易策略。该策略通过LSTM模型对历史价格数据进行特征提取再利用强化学习算法如DQN或PPO训练智能体使其能够根据市场状态做出买卖决策。核心在于设计合理的奖励函数将交易信号与市场反馈有效关联从而优化策略的收益风险比。该策略适用于股票、期货等金融时间序列数据的自动化交易但需注意其对历史数据的依赖性和潜在的过拟合风险。作用与风险分析作用自适应学习LSTM能捕捉价格序列中的非线性关系强化学习使策略具备动态调整能力风险控制通过奖励函数设计可自然融入止损止盈逻辑多维度决策可整合量价指标、波动率等多维特征作为输入策略迭代支持在线学习机制持续适应市场变化风险数据依赖性策略性能高度依赖历史数据的质量和完整性参数敏感性奖励函数权重、LSTM超参数等设置不当可能导致策略失效黑箱特性深度学习模型缺乏可解释性难以进行归因分析市场突变极端行情下可能产生异常交易行为计算成本需要GPU加速训练实时交易存在延迟风险系统架构设计1. 数据预处理模块importnumpyasnpimportpandasaspdfromsklearn.preprocessingimportMinMaxScalerclassDataPreprocessor:def__init__(self,lookback_window60):self.lookback_windowlookback_window self.scalerMinMaxScaler(feature_range(-1,1))defprepare_data(self,df,features[close,volume]):处理原始数据并生成标准化特征矩阵# 计算技术指标dfself._add_technical_indicators(df)# 选择目标特征datadf[features].values# 数据标准化scaled_dataself.scaler.fit_transform(data)# 创建滑动窗口样本X,y[],[]foriinrange(len(scaled_data)-self.lookback_window):X.append(scaled_data[i:iself.lookback_window])y.append(scaled_data[iself.lookback_window,0])# 预测收盘价returnnp.array(X),np.array(y)def_add_technical_indicators(self,df):添加常用技术指标# RSIdeltadf[close].diff()gain(delta.where(delta0,0)).rolling(window14).mean()loss(-delta.where(delta0,0)).rolling(window14).mean()df[rsi]100-(100/(1(gain/loss)))# MACDexp1df[close].ewm(span12,adjustFalse).mean()exp2df[close].ewm(span26,adjustFalse).mean()df[macd]exp1-exp2 df[signal_line]df[macd].ewm(span9,adjustFalse).mean()# 成交量加权均价df[vwap](df[close]*df[volume]).cumsum()/df[volume].cumsum()returndf.dropna()2. LSTM特征编码器importtensorflowastffromtensorflow.keras.modelsimportSequentialfromtensorflow.keras.layersimportLSTM,Dense,Dropout,BatchNormalizationclassLSTMFeatureEncoder:def__init__(self,input_shape,units128,dropout_rate0.2):self.modelself._build_model(input_shape,units,dropout_rate)def_build_model(self,input_shape,units,dropout_rate):构建LSTM特征提取网络modelSequential([LSTM(units,return_sequencesTrue,input_shapeinput_shape),BatchNormalization(),Dropout(dropout_rate),LSTM(units//2,return_sequencesFalse),BatchNormalization(),Dropout(dropout_rate),Dense(units//4,activationrelu),Dense(1,activationlinear)# 输出潜在价格趋势])model.compile(optimizeradam,lossmse)returnmodeldeftrain(self,X_train,y_train,epochs50,batch_size32):训练LSTM编码器early_stoptf.keras.callbacks.EarlyStopping(monitorval_loss,patience5)self.model.fit(X_train,y_train,validation_split0.1,epochsepochs,batch_sizebatch_size,callbacks[early_stop],verbose0)defextract_features(self,X):获取LSTM编码后的特征表示returnself.model.predict(X,verbose0)3. 强化学习环境实现importgymfromgymimportspacesimportnumpyasnpclassTradingEnv(gym.Env):def__init__(self,price_data,initial_balance10000,transaction_cost0.001):super(TradingEnv,self).__init__()# 动作空间-1卖出0持有1买入self.action_spacespaces.Discrete(3)# 观测空间包含价格、RSI、MACD等特征self.observation_spacespaces.Box(low-np.inf,highnp.inf,shape(price_data.shape[1]3,))self.price_dataprice_data self.initial_balanceinitial_balance self.transaction_costtransaction_cost self.reset()defreset(self):self.current_step0self.portfolio_valueself.initial_balance self.cashself.initial_balance self.shares0self.max_drawdown0self.trade_history[]returnself._get_obs()def_get_obs(self):获取当前市场状态和投资组合状态market_stateself.price_data[self.current_step]portfolio_state[self.cash,self.shares,self.portfolio_value]returnnp.concatenate([market_state,portfolio_state])defstep(self,action):# 执行交易操作prev_cashself.cash prev_sharesself.sharesifaction1:# 买入buy_amountmin(self.cash,self.portfolio_value*0.1)self.sharesbuy_amount/(self.price_data[self.current_step,0]*(1self.transaction_cost))self.cash-buy_amountelifaction-1:# 卖出sell_amountmin(self.shares*self.price_data[self.current_step,0],self.portfolio_value*0.1)self.shares-sell_amount/(self.price_data[self.current_step,0]*(1-self.transaction_cost))self.cashsell_amount# 更新组合价值self.portfolio_valueself.cashself.shares*self.price_data[self.current_step,0]# 计算最大回撤peakmax(self.portfolio_value,self._get_peak())current_drawdown(peak-self.portfolio_value)/peak self.max_drawdownmax(self.max_drawdown,current_drawdown)# 记录交易历史self.trade_history.append({step:self.current_step,action:action,price:self.price_data[self.current_step,0],portfolio_value:self.portfolio_value})# 判断是否终止doneself.current_steplen(self.price_data)-1# 计算奖励rewardself._calculate_reward(prev_cash,prev_shares,action)self.current_step1returnself._get_obs(),reward,done,{max_drawdown:self.max_drawdown}def_calculate_reward(self,prev_cash,prev_shares,action):设计多维度奖励函数# 收益奖励profit_reward(self.portfolio_value-self.initial_balance)/self.initial_balance# 风险惩罚risk_penaltyself.max_drawdown# 交易成本惩罚transaction_penaltyabs(action)*self.transaction_cost# 夏普比率调整项sharpe_ratioself._calculate_sharpe_ratio()# 综合奖励total_rewardprofit_reward-risk_penalty-transaction_penaltysharpe_ratio*0.1returntotal_rewarddef_calculate_sharpe_ratio(self,risk_free_rate0.02,periods252):计算年化夏普比率returns[]foriinrange(1,len(self.trade_history)):prev_valueself.trade_history[i-1][portfolio_value]curr_valueself.trade_history[i][portfolio_value]returns.append((curr_value-prev_value)/prev_value)iflen(returns)2:return0mean_returnnp.mean(returns)*periods std_returnnp.std(returns)*np.sqrt(periods)sharpe_ratio(mean_return-risk_free_rate)/std_returnifstd_return!0else0returnsharpe_ratiodef_get_peak(self):获取历史最高点returnmax(trade[portfolio_value]fortradeinself.trade_history)奖励函数设计原则1. 多维度平衡机制有效的奖励函数应同时考虑以下要素收益因子绝对收益final_value - initial_value风险因子最大回撤、波动率、VaR效率因子夏普比率、信息比率成本因子交易频率、滑点损耗稳定性因子收益分布的标准差2. 动态权重调整classDynamicRewardScheduler:def__init__(self,base_weights{profit:0.4,risk:0.3,efficiency:0.2,cost:0.1}):self.base_weightsbase_weights self.current_weightsbase_weights.copy()defupdate_weights(self,training_progress,market_volatility):根据训练进度和市场波动动态调整权重# 随着训练深入逐渐增加风险控制的权重progress_factormin(training_progress/100,1.0)self.current_weights[risk]self.base_weights[risk]*(1progress_factor)self.current_weights[profit]self.base_weights[profit]*(1-progress_factor/2)# 根据市场波动调整效率权重volatility_factornp.clip(market_volatility/0.2,0.5,2.0)self.current_weights[efficiency]*volatility_factor# 确保所有权重之和为1totalsum(self.current_weights.values())forkeyinself.current_weights:self.current_weights[key]/total3. 惩罚机制设计违规类型惩罚方式数学表达过度交易线性递增惩罚penalty k * num_trades持仓集中度过高二次惩罚penalty c * position_concentration²违反止损规则固定比例扣除penalty stop_loss_violation * portfolio_value流动性不足冲击成本模拟penalty slippage * order_size入参关联机制1. 技术指标与LSTM输入的映射技术指标物理意义LSTM输入维度归一化范围收盘价序列价格趋势60维向量[-1, 1]RSI超买超卖1维标量[0, 1]MACD柱状图动量变化1维标量[-2, 2]成交量市场活跃度1维标量[0, 1]VWAP平均成本1维标量[0, 1]2. 强化学习状态空间构建defcreate_state_space(price_data,technical_indicators,portfolio_state):构建融合市场数据和投资组合的状态向量# 市场部分最近60个时间步的价格序列market_windowprice_data[-60:]# 假设已按时间顺序排列# 技术指标快照indicator_snapshotnp.array([technical_indicators[rsi],technical_indicators[macd],technical_indicators[volume]])# 投资组合状态portfolio_vectornp.array([portfolio_state[cash],portfolio_state[shares],portfolio_state[portfolio_value]])# 拼接所有组件state_vectornp.concatenate([market_window.flatten(),indicator_snapshot,portfolio_vector])returnstate_vector.astype(np.float32)3. 动作空间离散化策略动作类型含义适用场景仓位管理建议-1清仓预期下跌保留≥70%现金0观望不确定性高维持现状1满仓强烈看涨使用≤30%杠杆2半仓温和上涨保持灵活性3对冲高风险环境配置反向ETF完整策略实现1. 主程序框架defmain():# 1. 数据加载与预处理data_pathdaily_stock_data.csvdfpd.read_csv(data_path)preprocessorDataPreprocessor(lookback_window60)X,ypreprocessor.prepare_data(df)# 2. 训练LSTM特征编码器lstm_encoderLSTMFeatureEncoder(input_shape(60,len(features)))lstm_encoder.train(X[:int(0.8*len(X))],y[:int(0.8*len(X))])# 提取测试集特征test_featureslstm_encoder.extract_features(X[int(0.8*len(X)):])# 3. 初始化交易环境price_datatest_features[:,:,0]# 取第一个特征作为价格序列envTradingEnv(price_data)# 4. 配置强化学习算法以PPO为例fromstable_baselines3importPPO modelPPO(MlpPolicy,env,verbose1,learning_rate3e-4,n_steps2048,batch_size64,ent_coef0.0,tensorboard_log./ppo_tensorboard/)# 5. 训练智能体model.learn(total_timesteps100000,log_interval10)# 6. 回测与评估obsenv.reset()doneFalsewhilenotdone:action,_statesmodel.predict(obs)obs,rewards,done,infoenv.step(action)print(fAction:{action}, Portfolio Value:{info[current_portfolio_value]:.2f})# 7. 保存模型model.save(trading_agent.zip)if__name____main__:main()2. 关键参数调优表参数类别推荐范围典型值影响方向LSTM单元数64-256128↑复杂度/↓速度Dropout率0.1-0.30.2↑泛化/↓拟合折扣因子γ0.9-0.990.95↑长期视野探索率ε0.01-0.10.05↑探索/↓稳定批量大小32-12864↑并行/↓内存学习率1e-4-1e-33e-4↑收敛/↓震荡