DDQN求解FJSP问题

news2024/12/23 1:22:29

柔性调度问题代码,DDQN求解FJSP问题

迭代曲线图

奖励函数迭代曲线

在这里插入图片描述

makespan迭代曲线

在这里插入图片描述

代码!!

全部见我的git仓库: DFJSP_Share

DDQN算法主体

import csv
import os
from environment2.Environment import Environment
import torch
from environment1.CompositeDispatchingRules import Composite_rules
from collections import namedtuple
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

from Gantt.gantt import Gantt2
from data.InstanceGenerator import Instance_Generator
import pandas as pd
import copy
import random
import numpy as np
from Drawline.draw import drawLine
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_EPISODES = 100
Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward'))

np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


class ReplayMemory:
    def __init__(self, CAPACITY):
        self.capacity = CAPACITY
        self.memory = []
        self.index = 0

    def push(self, state, action, state_next, reward):
        '''transition = (state, action, state_next, reward)'''

        if len(self.memory) < self.capacity:
            self.memory.append(None)

        self.memory[self.index] = Transition(state, action, state_next, reward)

        self.index = (self.index + 1) % self.capacity

    def sample(self, batch_size):
        '''batch_size'''
        return random.sample(self.memory, batch_size)

    def __len__(self):
        '''???memory?????'''
        return len(self.memory)


class Net(nn.Module):

    def __init__(self, input_dims, num_actions):
        super(Net, self).__init__()
        # self.chkpt_dir = chkpt_dir
        # self.checkpoint_file = os.path.join(self.chkpt_dir, name)
        self.fc1 = nn.Linear(input_dims, 30)
        self.fc2 = nn.Linear(30, 30)
        self.fc3 = nn.Linear(30, 30)
        self.fc4 = nn.Linear(30, 30)
        self.fc5 = nn.Linear(30, 30)
        self.fc6 = nn.Linear(30, num_actions)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        output = self.fc6(x)
        return output

class Brain:
    def __init__(self, inputdims, num_actions, CAPACITY, learing_rate, BATCH_SIZE, GAMMA, e_greedy_increment):
        self.num_actions = num_actions
        self.memory = ReplayMemory(CAPACITY)
        self.main_q_network = Net(inputdims, self.num_actions).to(device)
        self.target_q_network = Net(inputdims, self.num_actions).to(device)
        # print(self.main_q_network)
        self.optimizer = optim.Adagrad(self.main_q_network.parameters(), lr=learing_rate)
        self.loss_history = []
        self.BatchSize = BATCH_SIZE
        self.gamma = GAMMA
        self.epsilon = 0.5 if e_greedy_increment is not None else 0.1
        self.e_greedy_increment = e_greedy_increment

    def replay(self, step):
        '''Experience Replay'''
        if len(self.memory) < self.BatchSize:
            return

        self.batch, self.state_batch, self.action_batch, self.reward_batch, self.non_final_next_states = self.make_minibatch()
        self.expected_state_action_values = self.get_expected_state_action_values()

        self.update_main_q_network()
        if step % 500:
            self.update_target_q_network()

    def decide_action(self, state):

        # epsilon = 0.5 * (1 / (episode + 1))
        # epsilon = (1 / episode) if episode > 0.1 else 0.1
        self.epsilon = self.epsilon - self.e_greedy_increment if self.epsilon > 0.1 else 0.1
        if self.epsilon <= np.random.uniform(0, 1):
            self.main_q_network.eval().to(device)
            # with torch.no_grad():
            action = self.main_q_network(state).max(1)[1].view(1, 1).to(device)
        else:
            action = torch.LongTensor([[random.randrange(self.num_actions)]]).to(device)
        return action

    def make_minibatch(self):
        transitions = self.memory.sample(self.BatchSize)
        batch = Transition(*zip(*transitions))
        state_batch = torch.cat(batch.state)
        action_batch = torch.cat(batch.action)
        reward_batch = torch.cat(batch.reward)
        non_final_next_states = torch.cat([s for s in batch.next_state
                                           if s is not None])

        return batch, state_batch, action_batch, reward_batch, non_final_next_states

    def get_expected_state_action_values(self):

        self.main_q_network.eval()
        self.target_q_network.eval()

        self.state_action_values = self.main_q_network(self.state_batch).gather(1, self.action_batch)

        non_final_mask = torch.ByteTensor(tuple(map(lambda s: s is not None, self.batch.next_state))).bool().to(device)

        next_state_values = torch.zeros(self.BatchSize).to(device)

        a_m = torch.zeros(self.BatchSize).type(torch.LongTensor).to(device)

        a_m[non_final_mask] = self.main_q_network(self.non_final_next_states).detach().max(1)[1]

        a_m_non_final_next_states = a_m[non_final_mask].view(-1, 1)

        next_state_values[non_final_mask] = self.target_q_network(self.non_final_next_states).gather(1, a_m_non_final_next_states).detach().squeeze()

        expected_state_action_values = self.reward_batch + self.gamma * next_state_values

        return expected_state_action_values

    def update_main_q_network(self):
        self.main_q_network.train()

        loss = F.mse_loss(self.state_action_values.float(), self.expected_state_action_values.unsqueeze(1).float())
        self.loss_history.append(loss.item())

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def update_target_q_network(self):
        '''Target Q-Network??Main???'''
        self.target_q_network.load_state_dict(self.main_q_network.state_dict())  # 这种方式的加载模型是不需要存储模型的
        # self.target_q_network.load_checkpoint()


class Agent:
    def __init__(self, inputdims, num_actions, CAPACITY, learing_rate, BATCH_SIZE, GAMMA, e_greedy_increment):

        self.brain = Brain(inputdims, num_actions, CAPACITY, learing_rate, BATCH_SIZE, GAMMA, e_greedy_increment)

    def update_q_function(self, step):

        self.brain.replay(step)

    def get_action(self, state, episode):

        action = self.brain.decide_action(state)
        return action

    def memorize(self, state, action, state_next, reward):

        self.brain.memory.push(state, action, state_next, reward)

    def update_target_q_function(self):

        self.brain.update_target_q_network()


class DDQN_main:
    def __init__(self, Capacity, Learning_rate, Batch_size, GAMMA, e_greedy_increment=0.0001, input_dims=7, num_actions=6):
        ProcessTime, A1, D1, M_num, Oij_list, Oij_dict, O_num, J_num = Instance_Generator(Initial_Job_num=2, M_num=4,
                                                                                      E_ave=50, New_inset=3, DDT=0.5)

        self.env = Environment(J_num, M_num, O_num, ProcessTime, Oij_list, Oij_dict, D1, A1)
        self.num_actions = num_actions
        self.NUM_EPISODES = NUM_EPISODES
        self.Capacity = Capacity
        self.Batch_size = Batch_size
        self.GAMMA = GAMMA
        self.Learning_rate = Learning_rate
        self.input_dims = input_dims
        self.agent = Agent(inputdims=self.input_dims, num_actions=self.num_actions, learing_rate=self.Learning_rate,
                           CAPACITY=self.Capacity, BATCH_SIZE=self.Batch_size, GAMMA=self.GAMMA, e_greedy_increment=e_greedy_increment)

    def run(self, times=0):
            step = 0
            episode_reward, makespan_history = [], []
            save_param = float('inf')
            for episode in range(NUM_EPISODES):
                ProcessTime, A1, D1, M_num, Oij_list, Oij_dict, O_num, J_num = Instance_Generator(Initial_Job_num=2,
                                                                                                      M_num=4,
                                                                                                      E_ave=50,
                                                                                                      New_inset=3,
                                                                                                      DDT=0.5)

                self.env = Environment(J_num, M_num, O_num, ProcessTime, Oij_list, Oij_dict, D1, A1)
                time_collect = []
                state, done = self.env.reset()
                state = torch.from_numpy(state).type(torch.FloatTensor).to(device)
                state = torch.unsqueeze(state, 0).to(device)
                reward_sum = 0
                done_collect, a_collect, start_end_collect = [], [], []
                while not done:
                    action = self.agent.get_action(state, episode)
                    # for a in action:
                    a = Composite_rules(action.item(), self.env)
                    state_next, reward, done, start_to_end_list = self.env.step(a)
                    reward_sum += reward
                    state_next = torch.from_numpy(state_next).type(torch.FloatTensor).to(device)
                    state_next = torch.unsqueeze(state_next, 0).to(device)
                    # reward = torch.FloatTensor([0.0]).to(device)
                    reward = torch.as_tensor(reward)
                    reward = torch.unsqueeze(reward, 0).to(device)
                    # state, action, state_next, reward
                    self.agent.memorize(state, action, state_next, reward)
                    # Experience Replay
                    self.agent.update_q_function(step)
                    # if step % 100:  # 每500步更新target网络
                    #     self.agent.update_target_q_function()
                    state = state_next
                    step += 1
                    done_collect.append(done)
                    a_collect.append(a)
                    start_end_collect.append(start_to_end_list)
                    time_collect.append(start_to_end_list[1])
                    if done:
                        episode_reward.append(reward_sum)
                        makespan_history.append(np.max(time_collect))

                        if episode % 100 == 0:
                            print('episode', episode, 'makespan %.1f' % np.max(time_collect), 'reward', reward_sum)
                        a = np.max(time_collect)
                        if save_param > a:
                            save_param = np.max(time_collect)
                            Gantt2(done_collect, a_collect, start_end_collect, self.env.Job_num, self.env.Machine_num,
                                   save_param, f'../gattePicture/{times}.png')
            # 画图
            drawLine(makespan_history, episode_reward, self.agent.brain.loss_history, f'../PictureSaver/{self.Capacity,self.Batch_size,self.GAMMA,self.Learning_rate, times}.png')
            return makespan_history, episode_reward, self.agent.brain.loss_history


if __name__ == '__main__':
    # D = DDQN_main(15261, 0.000000015, 521, 0.85)
    D = DDQN_main(Capacity=3000, Learning_rate=0.000001, Batch_size=32, GAMMA=0.75)
    D.run()
    # D.run()

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/787063.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

生物信息学_玉泉路_课堂笔记_05 第五章 从头基因组组装与注释

&#x1f345; 课程&#xff1a;生物信息学_玉泉路_课堂笔记 中科院_2022秋季课 第一学期 &#x1f345; 个人笔记使用 &#x1f345; 2023/7/20 上周回顾 从头基因组组装与注释 第一节 基因组调查 最左边的 error-peak 是测序错误导致的 杂合导致的峰 重复序列 在纯和峰值的右…

简单明了证明多态虚表是位于常量区

证实虚表存储与常量区 class Person { public:virtual void BuyTicket() { cout << "买票-全价" << endl; }virtual void Func1() {cout << "Person::Func1()" << endl;}virtual void Func2() {cout << "Person::Func…

2 mybatis常见错误汇总

2.1 org.apache.ibatis.exceptions.PersistenceException: ### Error building SqlSession. ### Cause: org.apache.ibatis.builder.BuilderException: Error creating document instance. Cause: org.xml.sax.SAXParseException; lineNumber: 2; columnNumber: 6; 不允许有匹…

ubuntu 20.04 rtc时间显示问题探究

1、硬件与软件 本次测试的硬件为RK3568芯片&#xff0c;操作系统为ubuntu 20.04。 2、RTC与系统时间 先说结果&#xff0c;如果RTC驱动不可用或者RTC内部存储的时间非法&#xff0c; 那么操作系统会存储上一次有效的时间&#xff0c;当再次上电时&#xff0c;date命令会使用存储…

gitee上创建新仓库如何clone到本地,并初始化项目

目录 一、克隆 方法一 方法二 二、初始化项目 构建基本框架 自动生成代码 一、克隆 方法一 由于github速度较慢&#xff0c;这里我们使用gitee。我们在gitee上面创建一个仓库&#xff0c;然后我们可以通过ideal直接克隆下来&#xff0c;仓库设置如下 接着使用ideal将项…

Matlab 点云平面特征提取

文章目录 一、简介二、实现代码2.1基于k个邻近点2.2基于邻近半径参考资料一、简介 点云中存在这各种各样的几何特征,这里基于每个点的邻域协方差来获取该点的所具有的基础几何特征(如下图所示),这样的做法虽然不能很好的提取出点云中的各个部分,但却是可以作为一种数据预处…

超越POSIX:一个时代的终结?

在本文中&#xff0c;我们通过对Portable Operating System Interface&#xff08;POSIX&#xff09;抽象的历史演变进行系统性的回顾&#xff0c;提供了一个全面的视图。我们讨论了推动这些演变的一些关键因素&#xff0c;并确定了在构建现代应用程序时使它们不可行的缺陷。 …

想要快速进阶车载测试!这些基础问题你一定要知道

1.简述ADCU(域控)有哪些基本组成单元&#xff1f; MCU,SOC,加解串器&#xff0c;CAN收发器&#xff0c;网络交换机&#xff0c;电源管理模块芯片PMIC等。 2.MCU和SOC的区别是什么&#xff1f;主要应用场景分别有哪些&#xff1f; MCU为主控芯片&#xff0c;底盘域以及车身域等…

拓扑排序模板(附带题解:有向图的拓扑序列+Fine Logic)

目录 应用的问题&#xff1a; 原理解释及性质&#xff1a; 原理&#xff1a; 性质&#xff1a; 解题模板&#xff1a; 模板1&#xff1a; 模板2&#xff1a; 例题&#xff1a; 例题1&#xff1a;有向图的拓扑序列 题目描述&#xff1a; 思路&#xff1a; 代码详解&…

【Linux】文件描述符及重定向

目录 文件描述符的引入 什么是文件描述符 文件描述符的分配规则 重定向 输出重定向 输入重定向 追加重定向 dup2() 文件描述符的引入 上一章&#xff0c;我们讲解了系统接口&#xff0c;了解了open()函数的返回值是一个整数.那么这个整数究竟是什么呢&#xff1f;我们可…

非Spring环境 | Mybatis-Plus插入数据返回主键两种方式(注解或XML)

废话不多说&#xff0c;直接撸代码: <?xml version"1.0" encoding"UTF-8"?> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> <mapper namespace&qu…

vr海上安全作业模拟培训软件降低风险隐患

VR虚拟现实技术在军事训练中的应用越来越广泛。其中&#xff0c;将VR技术应用到出海安全知识教育培训中&#xff0c;可以有效地提高学员的安全知识和技能水平&#xff0c;降低实际操作中的风险。以下是VR出海安全知识教育培训的价值。 VR技术是一种利用计算机生成的虚拟环境来模…

【SpirngCloud】分布式事务解决方案

【SpirngCloud】分布式事务解决方案 文章目录 【SpirngCloud】分布式事务解决方案1. 理论基础1.1 CAP 理论1.2 BASE 理论1.3 分布式事务模型 2. Seata 架构2.1 项目引入 Seata 3. 强一致性分布式事务解决方案3.1 XA 模式3.1.1 seata的XA模式3.1.2 XA 模式实践3.1.3 总结 4. 最终…

ARP协议(地址解析协议)

文章目录 ARP协议&#xff08;地址解析协议&#xff09;MAC地址ARP协议ARP具体实现同一链路不同链路 ARP 缓存缓存查询 APR请求/响应报文 ARP协议&#xff08;地址解析协议&#xff09; MAC地址 MAC 地址的全称是 Media Access Control Address&#xff0c;即媒体访问控制地址…

全网最细,时序数据库InfluxDB详解,你不知道的都在这...

目录&#xff1a;导读 前言一、Python编程入门到精通二、接口自动化项目实战三、Web自动化项目实战四、App自动化项目实战五、一线大厂简历六、测试开发DevOps体系七、常用自动化测试工具八、JMeter性能测试九、总结&#xff08;尾部小惊喜&#xff09; 前言 直接上图片&#…

新一代信息技术浪潮下的DPU力量 中科驭数亮相2023中国互联网大会

7月18日&#xff0c;由中国互联网协会主办&#xff0c;北京经济技术开发区管委会、北京市通信管理局、工业和信息化部新闻宣传中心联合主办的2023&#xff08;第二十二届&#xff09;中国互联网大会如约而至&#xff01;中科驭数出席本次大会&#xff0c;高级副总裁张宇应邀在新…

Python内置函数系统学习(3)——数据转换与计算(详细语法参考 + 参数说明 + 具体示例)详解 min()函数 | lambda 真的牛啊

两岸猿声啼不住&#xff0c;轻舟已过万重山&#xff01; &#x1f3af;作者主页&#xff1a; 追光者♂&#x1f525; &#x1f338;个人简介&#xff1a; &#x1f496;[1] 计算机专业硕士研究生&#x1f496; &#x1f31f;[2] 2022年度博客之星人工智能领域TOP4&#x1f…

java项目之社区生活超市管理系统(ssm+mysql+jsp)

风定落花生&#xff0c;歌声逐流水&#xff0c;大家好我是风歌&#xff0c;混迹在java圈的辛苦码农。今天要和大家聊的是一款基于ssm的社区生活超市管理系统。技术交流和部署相关看文章末尾&#xff01; 开发环境&#xff1a; 后端&#xff1a; 开发语言&#xff1a;Java 框…

【计算机视觉 | 目标检测】arxiv 计算机视觉关于目标检测的学术速递(7 月 25 日论文合集)

文章目录 一、Automotive Object Detection via Learning Sparse Events by Temporal Dynamics of Spiking Neurons1.2 Exposing the Troublemakers in Described Object Detection1.3 AMAE: Adaptation of Pre-Trained Masked Autoencoder for Dual-Distribution Anomaly Dete…

【档案专题】四、电子档案形成与收集

导读&#xff1a;主要针对电子档案形成与收集相关内容介绍。对从事电子档案管理信息化的职业而言&#xff0c;不断夯实电子档案管理相关理论基础是十分重要。只有通过不断梳理相关知识体系和在实际工作当中应用实践&#xff0c;才能走出一条专业化加职业化的道路&#xff0c;从…