Model-Free TD Control: Sarsa

news2025/1/11 20:51:37

import time
import random
# 相对于Q 效果会差一些
class Env():
    def __init__(self, length, height):
        # define the height and length of the map
        self.length = length
        self.height = height
        # define the agent's start position
        self.x = 0
        self.y = 0

    def render(self, frames=50):
        for i in range(self.height):
            if i == 0: # cliff is in the line 0
                line = ['S'] + ['x']*(self.length - 2) + ['T'] # 'S':start, 'T':terminal, 'x':the cliff
            else:
                line = ['.'] * self.length
            if self.x == i:
                line[self.y] = 'o' # mark the agent's position as 'o'
            print(''.join(line))
        print('\033['+str(self.height+1)+'A')  # printer go back to top-left
        time.sleep(1.0 / frames)

    def step(self, action):
        """4 legal actions, 0:up, 1:down, 2:left, 3:right"""
        change = [[0, 1], [0, -1], [-1, 0], [1, 0]]
        self.x = min(self.height - 1, max(0, self.x + change[action][0]))
        self.y = min(self.length - 1, max(0, self.y + change[action][1]))

        states = [self.x, self.y]
        reward = -1 # 每一步的奖赏
        terminal = False
        if self.x == 0: # if agent is on the cliff line "SxxxxxT"
            if self.y > 0: # if agent is not on the start position
                terminal = True
                if self.y != self.length - 1: # if agent falls
                    reward = -100 # 进入悬崖的奖赏
        return reward, states, terminal

    def reset(self):
        self.x = 0
        self.y = 0

class Q_table():
    def __init__(self, length, height, actions=4, alpha=0.1, gamma=0.9):
        self.table = [0] * actions * length * height # initialize all Q(s,a) to zero
        self.actions = actions
        self.length = length
        self.height = height
        self.alpha = alpha
        self.gamma = gamma

    def _index(self, a, x, y):
        """Return the index of Q([x,y], a) in Q_table."""
        return a * self.height * self.length + x * self.length + y

    def _epsilon(self):
        return 0.1 # 可更改
        # version for better convergence:
        # """At the beginning epsilon is 0.2, after 300 episodes decades to 0.05, and eventually go to 0."""
        # return 20. / (num_episode + 100)

    def take_action(self, x, y, num_episode):
        """epsilon-greedy action selection"""
        if random.random() < self._epsilon():
            return int(random.random() * 4)
        else:
            actions_value = [self.table[self._index(a, x, y)] for a in range(self.actions)]
            return actions_value.index(max(actions_value))

    def epsilon_q(self, x, y): # 更改
        actions_value = [self.table[self._index(a, x, y)] for a in range(self.actions)]
        # 更改
        return max(actions_value) if random.random() > self._epsilon()  else actions_value[int(random.random() * 4)]

    def update(self, a, s0, s1, r, is_terminated):
        # both s0, s1 have the form [x,y]
        q_predict = self.table[self._index(a, s0[0], s0[1])]
        if not is_terminated:
            q_target = r + self.gamma * self.epsilon_q(s1[0], s1[1]) # 更改
        else:
            q_target = r
        self.table[self._index(a, s0[0], s0[1])] += self.alpha * (q_target - q_predict)

def cliff_walk():
    env = Env(length=12, height=4)
    table = Q_table(length=12, height=4)
    for num_episode in range(5000):
        # within the whole learning process
        episodic_reward = 0
        is_terminated = False
        s0 = [0, 0]
        while not is_terminated:
            # within one episode
            action = table.take_action(s0[0], s0[1], num_episode)
            r, s1, is_terminated = env.step(action)
            table.update(action, s0, s1, r, is_terminated)
            episodic_reward += r
            # env.render(frames=100)
            s0 = s1
        if num_episode % 1 == 0:
            print("Episode: {}, Score: {}".format(num_episode, episodic_reward))
        env.reset()

cliff_walk()

 Episode: 0, Score: -100
Episode: 20, Score: -147
Episode: 40, Score: -48
Episode: 60, Score: -131
Episode: 80, Score: -54
Episode: 100, Score: -63
Episode: 120, Score: -39
Episode: 140, Score: -100
Episode: 160, Score: -38
Episode: 180, Score: -31
Episode: 200, Score: -28
Episode: 220, Score: -25
Episode: 240, Score: -17
Episode: 260, Score: -26
Episode: 280, Score: -103
Episode: 300, Score: -17
Episode: 320, Score: -100
Episode: 340, Score: -17
Episode: 360, Score: -21
Episode: 380, Score: -23
Episode: 400, Score: -19
Episode: 420, Score: -24
Episode: 440, Score: -23
Episode: 460, Score: -100
Episode: 480, Score: -16
Episode: 500, Score: -17
Episode: 520, Score: -28
Episode: 540, Score: -15
Episode: 560, Score: -15
Episode: 580, Score: -17
Episode: 600, Score: -100
Episode: 620, Score: -19
Episode: 640, Score: -19
Episode: 660, Score: -102
Episode: 680, Score: -17
Episode: 700, Score: -16
Episode: 720, Score: -17
Episode: 740, Score: -19
Episode: 760, Score: -115
Episode: 780, Score: -15
Episode: 800, Score: -17
Episode: 820, Score: -16
Episode: 840, Score: -15
Episode: 860, Score: -15
Episode: 880, Score: -17
Episode: 900, Score: -17
Episode: 920, Score: -19
Episode: 940, Score: -17
Episode: 960, Score: -18
Episode: 980, Score: -23
Episode: 1000, Score: -19
Episode: 1020, Score: -18
Episode: 1040, Score: -17
Episode: 1060, Score: -20
Episode: 1080, Score: -17
Episode: 1100, Score: -17
Episode: 1120, Score: -19
Episode: 1140, Score: -21
Episode: 1160, Score: -24
Episode: 1180, Score: -20
Episode: 1200, Score: -21
Episode: 1220, Score: -19
Episode: 1240, Score: -19
Episode: 1260, Score: -17
Episode: 1280, Score: -23
Episode: 1300, Score: -17
Episode: 1320, Score: -15
Episode: 1340, Score: -15
Episode: 1360, Score: -15
Episode: 1380, Score: -20
Episode: 1400, Score: -19
Episode: 1420, Score: -17
Episode: 1440, Score: -15
Episode: 1460, Score: -17
Episode: 1480, Score: -15
Episode: 1500, Score: -15
Episode: 1520, Score: -15
Episode: 1540, Score: -15
Episode: 1560, Score: -18
Episode: 1580, Score: -17
Episode: 1600, Score: -15
Episode: 1620, Score: -20
Episode: 1640, Score: -17
Episode: 1660, Score: -117
Episode: 1680, Score: -21
Episode: 1700, Score: -21
Episode: 1720, Score: -22
Episode: 1740, Score: -18
Episode: 1760, Score: -19
Episode: 1780, Score: -17
Episode: 1800, Score: -19
Episode: 1820, Score: -19
Episode: 1840, Score: -17
Episode: 1860, Score: -20
Episode: 1880, Score: -17
Episode: 1900, Score: -21
Episode: 1920, Score: -17
Episode: 1940, Score: -17
Episode: 1960, Score: -15
Episode: 1980, Score: -17
Episode: 2000, Score: -15
Episode: 2020, Score: -19
Episode: 2040, Score: -17
Episode: 2060, Score: -19
Episode: 2080, Score: -18
Episode: 2100, Score: -17
Episode: 2120, Score: -18
Episode: 2140, Score: -18
Episode: 2160, Score: -17
Episode: 2180, Score: -21
Episode: 2200, Score: -20
Episode: 2220, Score: -21
Episode: 2240, Score: -18
Episode: 2260, Score: -17
Episode: 2280, Score: -17
Episode: 2300, Score: -18
Episode: 2320, Score: -18
Episode: 2340, Score: -17
Episode: 2360, Score: -17
Episode: 2380, Score: -19
Episode: 2400, Score: -18
Episode: 2420, Score: -100
Episode: 2440, Score: -19
Episode: 2460, Score: -23
Episode: 2480, Score: -19
Episode: 2500, Score: -19
Episode: 2520, Score: -18
Episode: 2540, Score: -18
Episode: 2560, Score: -19
Episode: 2580, Score: -21
Episode: 2600, Score: -18
Episode: 2620, Score: -21
Episode: 2640, Score: -20
Episode: 2660, Score: -17
Episode: 2680, Score: -19
Episode: 2700, Score: -18
Episode: 2720, Score: -19
Episode: 2740, Score: -22
Episode: 2760, Score: -19
Episode: 2780, Score: -22
Episode: 2800, Score: -17
Episode: 2820, Score: -17
Episode: 2840, Score: -18
Episode: 2860, Score: -17
Episode: 2880, Score: -21
Episode: 2900, Score: -21
Episode: 2920, Score: -17
Episode: 2940, Score: -18
Episode: 2960, Score: -17
Episode: 2980, Score: -19
Episode: 3000, Score: -18
Episode: 3020, Score: -17
Episode: 3040, Score: -17
Episode: 3060, Score: -21
Episode: 3080, Score: -15
Episode: 3100, Score: -19
Episode: 3120, Score: -17
Episode: 3140, Score: -17
Episode: 3160, Score: -17
Episode: 3180, Score: -17
Episode: 3200, Score: -17
Episode: 3220, Score: -18
Episode: 3240, Score: -19
Episode: 3260, Score: -19
Episode: 3280, Score: -17
Episode: 3300, Score: -18
Episode: 3320, Score: -17
Episode: 3340, Score: -25
Episode: 3360, Score: -18
Episode: 3380, Score: -17
Episode: 3400, Score: -19
Episode: 3420, Score: -17
Episode: 3440, Score: -15
Episode: 3460, Score: -118
Episode: 3480, Score: -17
Episode: 3500, Score: -15
Episode: 3520, Score: -17
Episode: 3540, Score: -19
Episode: 3560, Score: -21
Episode: 3580, Score: -17
Episode: 3600, Score: -17
Episode: 3620, Score: -17
Episode: 3640, Score: -19
Episode: 3660, Score: -15
Episode: 3680, Score: -15
Episode: 3700, Score: -100
Episode: 3720, Score: -17
Episode: 3740, Score: -17
Episode: 3760, Score: -100
Episode: 3780, Score: -100
Episode: 3800, Score: -17
Episode: 3820, Score: -18
Episode: 3840, Score: -19
Episode: 3860, Score: -17
Episode: 3880, Score: -19
Episode: 3900, Score: -19
Episode: 3920, Score: -19
Episode: 3940, Score: -18
Episode: 3960, Score: -18
Episode: 3980, Score: -15
Episode: 4000, Score: -19
Episode: 4020, Score: -17
Episode: 4040, Score: -20
Episode: 4060, Score: -19
Episode: 4080, Score: -17
Episode: 4100, Score: -19
Episode: 4120, Score: -15
Episode: 4140, Score: -22
Episode: 4160, Score: -17
Episode: 4180, Score: -22
Episode: 4200, Score: -18
Episode: 4220, Score: -18
Episode: 4240, Score: -19
Episode: 4260, Score: -100
Episode: 4280, Score: -17
Episode: 4300, Score: -19
Episode: 4320, Score: -17
Episode: 4340, Score: -19
Episode: 4360, Score: -21
Episode: 4380, Score: -22
Episode: 4400, Score: -21
Episode: 4420, Score: -18
Episode: 4440, Score: -22
Episode: 4460, Score: -17
Episode: 4480, Score: -20
Episode: 4500, Score: -17
Episode: 4520, Score: -17
Episode: 4540, Score: -17
Episode: 4560, Score: -19
Episode: 4580, Score: -17
Episode: 4600, Score: -19
Episode: 4620, Score: -24
Episode: 4640, Score: -18
Episode: 4660, Score: -17
Episode: 4680, Score: -17
Episode: 4700, Score: -19
Episode: 4720, Score: -15
Episode: 4740, Score: -17
Episode: 4760, Score: -19
Episode: 4780, Score: -17
Episode: 4800, Score: -19
Episode: 4820, Score: -19
Episode: 4840, Score: -21
Episode: 4860, Score: -19
Episode: 4880, Score: -18
Episode: 4900, Score: -17
Episode: 4920, Score: -20
Episode: 4940, Score: -17
Episode: 4960, Score: -17
Episode: 4980, Score: -17

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/564380.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

二、go语言的编码规范

编码规范 一、 命名规范 Go在命名时以字母a到Z或a到Z或下划线开头&#xff0c;后面跟着零或更多的字母、下划线和数字(0到9)。Go不允许在命名时中使用、$和%等标点符号。Go是一种区分大小写的编程语言。因此&#xff0c;Manpower和manpower是两个不同的命名。 当命名&#xf…

华为OD机试真题 Java 实现【组装新的数组】【2023Q1 200分】

一、题目描述 给你一个整数M和数组N,N中的元素为连续整数&#xff0c;要求根据N中的元素组装成新的数组R。 组装规则&#xff1a; R中元素总和加起来等于M&#xff1b;R中的元素可以从N中重复选取&#xff1b;R中的元素最多只能有1个不在N中&#xff0c;且比N中的数字都要小…

如何设置工业设备的振动监测阈值

工业设备的振动阈值设置是确保设备正常运行和及时维护的关键步骤。本文将介绍一些常见的方法和策略&#xff0c;帮助您正确设置工业设备的振动阈值。 1. ISO 10816 振动烈度表格&#xff1a; ISO 10816 是一项国际标准&#xff0c;提供了设备振动水平的参考值。该标准将设备按…

【SpringMVC源码三千问】@RequstMapping和RequestCondition

RequestMapping 是 SpringMVC 中最常用的定义请求映射关系的注解。 下面我们来分析一下它的源码。 RequestMapping 先看下 RequestMapping 的定义&#xff1a; Target({ElementType.TYPE, ElementType.METHOD}) Retention(RetentionPolicy.RUNTIME) Documented Mapping publ…

day4 - 使用图像绘制动态时钟

本期的主要内容是利用OpenCV中包含的绘图函数&#xff0c;例如绘制线段、绘制矩形、绘制圆形等来绘制一个动态时钟的表盘。 完成本期内容&#xff0c;你可以&#xff1a; 掌握OpenCV常见的绘图函数 学会使用绘图函数绘制简单的图像 若要运行案例代码&#xff0c;你需要有&a…

linux——搭建NTP服务器

1、设置服务器时区 &#xff08;使用外部NTP时间源可不设置&#xff09; 在Linux系统中设置时区可以使用以下命令&#xff1a; 查看当前时区&#xff1a; timedatectl列出所有可用时区&#xff1a; timedatectl list-timezones设置时区&#xff1a; sudo timedatectl set-…

一、尚医通手机登录

文章目录 一、登录需求1、登录效果2、登录需求 二、登录1&#xff0c;搭建service-user模块1.1 搭建service-user模块1.2 修改配置1.3 启动类1.4 配置网关 2、添加用户基础类2.1 添加model2.2 添加Mapper2.3 添加service接口及实现类2.4 添加controller 3、登录api接口3.1 添加…

OpenAI再出新作,AIGC时代,3D建模师的饭碗危险了!

大家好&#xff0c;我是千与千寻&#xff0c;也可以叫我千寻哥&#xff0c;说起来&#xff0c;自从ChatGPT发布之后&#xff0c;我就开始焦虑&#xff0c;担心自己程序员的饭碗会不会哪天就被AIGC取代了。 有人说我是过度焦虑了&#xff0c;但是我总觉有点危机感肯定没有坏处。…

分布式事务解决方案-Seata

分布式事务解决方案-Seata 1.分布式事务问题1.1.本地事务1.2.分布式事务1.3.演示分布式事务问题 2.理论基础2.1.CAP定理2.1.1.一致性2.1.2.可用性2.1.3.分区容错2.1.4.矛盾 2.2.BASE理论2.3.解决分布式事务的思路 3.初识Seata3.1.Seata的架构3.2.部署TC服务3.3.微服务集成Seata…

C++ string类 迭代器 范围for

string类 在C语言当中 &#xff0c;也有字符串&#xff0c;它是以 " \0 " 结尾 的 一些字符的集合&#xff0c;在C的标准库当中还有一些 用于操作 str 类型的库函数&#xff0c;但是&#xff0c;这些函数的功能不是很全面&#xff0c;而且这些操作函数和 str 类型是分…

B2B企业需要什么样的客户体验管理?销售易出手了

导读&#xff1a;如何将类似B2C领域的私域体验延展到B2B领域&#xff1f; “不愿在顾客上花时间带来的结果只有一个&#xff0c;那就是让客户转而寻找值得他们花时间的消费体验。”2012年问世的《体验经济》一书&#xff0c;一语道破客户体验的重要性。 过去&#xff0c;提到体…

PG安装使用walminer插件教程

一、下载源码 https://gitee.com/movead/XLogMiner/tree/walminer_3.0_stable/ 二、编译安装插件 克隆下载源码后&#xff0c;将walminer目录放进pg下的contrib目录中 cd /home/postgres/postgresql-15.3/contrib/将walminer源码目录放进此路径下&#xff0c;进入walminer目…

卷麻了,公司新来的00后测试用例写的比我还好,简直无地自容......

经常看到无论是刚入职场的新人&#xff0c;还是工作了一段时间的老人&#xff0c;都会对编写测试用例感到困扰&#xff1f;例如&#xff1a; 如何编写测试用例&#xff1f; 作为一个测试新人&#xff0c;刚开始接触测试&#xff0c;对于怎么写测试用例很是头疼&#xff0c;无法…

STM32寄存器映射

1. 寄存器基本原理 寄存器是单片机内部一种特殊的内存&#xff0c;可以实现对单片机各个功能的控制&#xff0c;我们编写程序最终就是去控制寄存器 下面的举例平台为STM32F407ZG 1.1 STM32寄存器分类 大类小类说明 内核寄存器 内核相关寄存器 包含R0~R15、xPSR、特殊功能寄…

《幸福关系的7段旅程》

关于作者 本书作者安德鲁∙马歇尔&#xff0c;英国顶尖婚姻咨询机构RELATE的资深专家&#xff0c;拥有 30年丰富的咨询经验&#xff0c;并为《泰晤士报》《观察家》和《星期日快报》撰写专栏文章。已出版19部作品&#xff0c;并被翻译成20种语言。 关于本书 《幸福关系的7段…

SQL查询比较慢,如何进行排查?如何进行SQL优化?

目录 一、开启慢查询日志 二、SQL优化 三、总结 一、开启慢查询日志 SQL慢查询是指执行时间较长的SQL语句&#xff0c;可能导致系统性能下降和响应时间延长。通过以下步骤可以开启慢查询日志记录&#xff1a; #查询是否开启慢查询日志 slow_query_log显示ON说明已开启&#…

广和通携手有人物联网完成5G SUL辅助上行功能验证

近日&#xff0c;广和通5G模组FM650-CN已在商用网络中实现5G SUL上行能力增强&#xff0c;助力有人物联网工业路由器在仿真网络环境中完成SUL辅助上行功能的验证。本次验证成功&#xff0c;意味着FM650-CN已具备SUL辅助上行商用能力&#xff0c;有利于推动更多5G终端支持SUL特性…

es 7 Es分布式基础

目录 复杂特性es已经做了分片副本负载均衡实现 设置分片数副本数 双机器读写 自动横向扩容 Node 节点宕机主节点切换 数据路由 增删改操作 读操作 _bulk 复杂特性es已经做了分片副本负载均衡实现 1.每个索引包含多个分片 设置分片数副本数 双机器读写 自动横向扩容 No…

React | React的CSS方式

✨ 个人主页&#xff1a;CoderHing &#x1f5a5;️ React.js专栏&#xff1a;React的CSS方式 &#x1f64b;‍♂️ 个人简介&#xff1a;一个不甘平庸的平凡人&#x1f36c; &#x1f4ab; 系列专栏&#xff1a;吊打面试官系列 16天学会Vue 11天学会React Node专栏 &#x…

Axure教程—多色折线图(中继器)

本文将教大家如何用AXURE中的中继器制作多色折线图 一、效果介绍 如图&#xff1a; 预览地址&#xff1a;https://xpdm3g.axshare.com 下载地址&#xff1a;https://download.csdn.net/download/weixin_43516258/87814320 二、功能介绍 简单填写中继器内容即可动态显示值样…