标题:When Learning Joins Edge: Real-time Proportional Computation Offloading via Deep Reinforcement Learning
会议:ICPADS 2019
一、梳理
问题:在任务进行卸载时,往往忽略了任务的特定的卸载比例。
模型:针对上述问题,我们提出了一种创新的强化学习(RL)方法来解决比例计算问题。我们考虑了一种常见的卸载场景,该场景具有时变带宽和异构设备,并且设备不断生成应用程序。对于每个应用程序,客户端必须选择本地或远程执行该应用程序,并确定要卸载的比例。我们将该问题制定为一个长期优化问题,然后提出一种基于RL的算法来解决该问题。基本思想是估计可能决策的收益,其中选择收益最大的决策。我们没有采用原来的深度Q网络(DQN),而是通过添加优先级缓冲机制和专家缓冲机制,提出了Advanced DQN(ADQN),分别提高了样本的利用率和克服了冷启动问题。
目的:最小化时延和能耗的权值和(优化参数:用户卸载策略,卸载的比例)。
算法:ADQN。
二、模型细节与算法
2.1系统模型
计算卸载主要包含: 本地计算和边缘计算
1、本地计算:用户在时隙时的计算时延(计算时延和排队时延)和能耗为
其中表示任务大小,表示本地计算任务的比例
2、边缘计算:包含时延(上下行传输时延+执行时延+排队时延)和能耗
其中表示卸载到边缘服务器的比例,表示每单位计算任务处理后得到的结果大小。
3、总的时延与能耗成本
2.2优化目标
2.3 State、Action和Reward
1、状态空间:
状态空间包含:(数据大小、本地CPU频率,local可用计算资源,上行速率,下行速率,ES可用计算资源)
2、动作空间:
动作空间包含:(用户选择ES的策略,任务卸载比例)
3、奖励:
奖励:
表示本地计算成本,表示部分卸载产生的成本。
2.4代码
计算卸载环境代码:
import math
import copy
import numpy as np
class ENV():
def __init__(self, UEs=3, MECs=7, k=33, lam=0.5):
self.UEs = UEs
self.MECs = MECs
self.k = k
q = np.full((k, 1), 0.)
p = np.linspace(0, 1, k).reshape((k, 1))
# Create Action
for i in range(MECs - 1):
a = np.full((k, 1), float(i + 1))
b = np.linspace(0, 1, k).reshape((k, 1))
q = np.append(q, a, axis=0)
p = np.append(p, b, axis=0) # 231(33 * 7) * 33
self.actions = np.hstack((q, p)) # 231 * 2
self.n_actions = len(self.actions) # 231
self.n_features = 3 + MECs * 3 # 3 + 7 * 3
self.discount = 0.01
# 基本参数
# 频率
self.Hz = 1
self.kHz = 1000 * self.Hz
self.mHz = 1000 * self.kHz
self.GHz = 1000 * self.mHz
self.nor = 10 ** (-7)
self.nor1 = 10 ** 19
# 数据大小
self.bit = 1
self.B = 8 * self.bit
self.KB = 1024 * self.B
self.MB = 1024 * self.KB
# self.task_cpu_cycle = np.random.randint(2 * 10**9, 3* 10**9)
self.UE_f = np.random.randint(1.5 * self.GHz * self.nor, 2 * self.GHz * self.nor) # UE的计算能力
self.MEC_f = np.random.randint(5 * self.GHz * self.nor, 7 * self.GHz * self.nor) # MEC的计算能力
# self.UE_f = 500 * self.mHz # UE的计算能力
# self.MEC_f = np.random.randint(5.2 * self.GHz, 24.3 * self.GHz) # MEC的计算能力
self.tr_energy = 1 # 传输能耗
self.r = 40 * math.log2(1 + (16 * 10)) * self.MB * self.nor # 传输速率
# self.r = 800 # 传输速率
self.ew, self.lw = 10 ** (-26), 3 * 10 ** (-26) # 能耗系数
# self.ew, self.lw = 0.3, 0.15 # 能耗系数
self.et, self.lt = 1, 1
self.local_core_max, self.local_core_min = 1.3 * self.UE_f, 0.7 * self.UE_f
self.server_core_max, self.server_core_min = 1.3 * self.MEC_f, 0.7 * self.MEC_f
self.uplink_max, self.uplink_min = 1.3 * self.r, 0.7 * self.r
self.downlink_max, self.downlink_min = 1.3 * self.r, 0.7 * self.r
self.lam = lam
self.e = 1
def reset(self):
# 初始化环境,状态空间
obs = []
servers_cap = []
new_cap = True
for i in range(self.UEs):
uplink, downlink = [], []
# np.random.seed(np.random.randint(1, 1000))
# task_size = np.random.randint(2 * 10**8 * self.nor, 3 * 10**8 * self.nor) # 任务大小
task_size = np.random.randint(1.5 * self.mHz, 2 * self.mHz) # 任务大小
# self.task_size = self.task_size * self.task_cpu_cycle # 处理一个任务所需要的cpu频率
# task_cpu_cycle = np.random.randint(2 * 10**9 * self.nor, 3 * 10**9 * self.nor)
task_cpu_cycle = np.random.randint(10 ** 3, 10 ** 5)
local_comp = np.random.randint(0.9 * self.UE_f, 1.1 * self.UE_f) # UE的计算能力
for i in range(self.MECs):
up = np.random.randint(0.9 * self.r, 1.1 * self.r)
down = np.random.randint(0.9 * self.r, 1.1 * self.r)
if new_cap:
cap = np.random.randint(0.9 * self.MEC_f, 1.1 * self.MEC_f) # MEC计算能力
servers_cap.append(cap)
uplink.append(up)
downlink.append(down)
observation = np.array([task_size, task_cpu_cycle, local_comp])
observation = np.hstack((observation, servers_cap, uplink, downlink))
obs.append(observation)
new_cap = False
return obs
def choose_action(self, prob):
"""
根据概率选择动作
:param prob:
:return:
"""
action_choice = np.linspace(0, 1, self.k)
actions = []
for i in range(self.UEs):
a = np.random.choice(a=(self.MECs * self.k), p=prob[i])
target_server = int(a / self.k)
percen = action_choice[a % self.k]
action = [target_server, percen]
actions.append(action)
return actions
def step(self, observation, actions_prob, is_prob=True, is_compared=True):
if is_prob:
actions = self.choose_action(actions_prob)
else:
actions = actions_prob
new_cap = False
obs_ = []
rew, local, ran, mec = [], [], [], []
dpg_times, local_times, ran_times, mec_times = [], [], [], []
dpg_energys, local_energys, ran_energys, mec_energys = [], [], [], []
total = []
a, b, c, d = 0, 0, 0, 0
for i in range(self.UEs):
if i == self.UEs - 1:
new_cap = True
# 提取信息
task_size, task_cpu_cycle, local_comp, servers_cap, uplink, downlink = \
observation[i][0], observation[i][1], observation[i][2], observation[i][3:3+self.MECs], observation[i][3+self.MECs:3+self.MECs*2], observation[i][3+self.MECs*2:3+self.MECs*3]
action = actions[i]
target_server, percen = int(action[0]), action[1]
# 计算奖励
# 1=======部分卸载==========
# 卸载及回传数据产生的时延和能耗
tr_time = (percen * task_size) / uplink[target_server] + self.discount * (percen * task_size) / downlink[
target_server]
tr_energy = (self.tr_energy * percen * task_size) / uplink[target_server] + self.discount * (
self.tr_energy * percen * task_size) / downlink[target_server]
# 本地计算时延和能耗
comp_local_time = task_cpu_cycle * (1 - percen) / (local_comp)
comp_local_energy = self.lw * task_cpu_cycle * (1 - percen) * local_comp ** 2
# 边缘计算时延和能耗
comp_mec_time = (percen * task_cpu_cycle) / servers_cap[target_server]
comp_mec_energy = self.ew * percen * task_cpu_cycle * servers_cap[target_server] ** 2
# 最大计算时延
comp_time = max(comp_local_time, comp_mec_time)
time_cost = (comp_time + tr_time) * self.et
# 能耗成本
energy_cost = (tr_energy + comp_local_energy + comp_mec_energy) * self.e
# 总成本
total_cost = self.lam * time_cost + (1 - self.lam) * energy_cost
# 2、=======完全本地计算==========
local_only_time = task_cpu_cycle / (local_comp) * self.et
local_only_energy = self.lw * task_cpu_cycle * local_comp ** 2 * self.e
# local_only_energy = task_size * local_comp
local_only = self.lam * local_only_time + (1 - self.lam) * local_only_energy
# 3、=======完全边缘计算==========
mec_only_tr_time = task_size / uplink[target_server] + self.discount * task_size / downlink[target_server]
mec_only_tr_energy = self.tr_energy * task_size / uplink[
target_server] + self.discount * self.tr_energy * task_size / downlink[target_server]
# print("mec_only_tr_time:", mec_only_tr_time)
# print("mec_only_tr_energy:", mec_only_tr_energy)
mec_only_comp_time = task_cpu_cycle / servers_cap[target_server]
mec_only_comp_energy = self.ew * task_cpu_cycle * servers_cap[target_server] ** 2
# mec_only_comp_energy = task_size * servers_cap[target_server]
# print("mec_only_comp_time:", mec_only_comp_time)
# print("mec_only_comp_energy:", mec_only_comp_energy)
mec_only_time_cost = (mec_only_tr_time + mec_only_comp_time) * self.et
mec_only_energy_cost = (mec_only_tr_energy + mec_only_comp_energy) * self.e
mec_only = self.lam * mec_only_time_cost + (1 - self.lam) * mec_only_energy_cost
# 4、=======随机卸载==========
percen_ran = np.random.uniform() # 随机卸载比例
mec_ran = np.random.randint(self.MECs) # 随机选择一个服务器进行卸载
random_tr_time = (percen_ran * task_size) / uplink[mec_ran] + (self.discount * percen_ran * task_size) / \
downlink[mec_ran]
random_tr_energy = (self.tr_energy * percen_ran * task_size) / uplink[mec_ran] + self.discount * (
self.tr_energy * percen_ran * task_size) / downlink[mec_ran]
random_comp_local_time = (1 - percen_ran) * task_cpu_cycle / local_comp
random_comp_local_energy = self.lw * (1 - percen_ran) * task_cpu_cycle * local_comp ** 2
# random_comp_local_energy = (1 - percen_ran) * task_size * local_comp
random_comp_mec_time = percen_ran * task_cpu_cycle / servers_cap[mec_ran]
random_comp_mec_energy = self.ew * percen_ran * task_cpu_cycle * servers_cap[mec_ran] ** 2
# random_comp_mec_energy = percen_ran * task_size * servers_cap[mec_ran]
random_comp_time = max(random_comp_local_time, random_comp_mec_time)
random_time_cost = (random_comp_time + random_tr_time) * self.et
random_energy_cost = (random_tr_energy + random_comp_local_energy + random_comp_mec_energy) * self.e
random_total = self.lam * random_time_cost + (1 - self.lam) * random_energy_cost
random_total_cost2 = random_energy_cost
reward = -total_cost
# 得到下一个observation
x = np.random.uniform()
y = 0.5
if x > y:
local_comp = min(local_comp + np.random.randint(0, 0.2 * self.UE_f), self.local_core_max)
for j in range(self.MECs):
cap = min(servers_cap[j] + np.random.randint(0, 0.3 * self.UE_f), self.server_core_max)
# MEC容量保持一致
if new_cap:
for x in range(self.UEs):
observation[x][2 + j] = cap
downlink[j] = min(downlink[j] + np.random.randint(0, 0.2 * self.r), self.downlink_max)
uplink[j] = min(uplink[j] + np.random.randint(0, 0.2 * self.r), self.uplink_max)
else:
local_comp = max(local_comp + np.random.randint(-0.2 * self.UE_f, 0), self.local_core_min)
for j in range(self.MECs):
# MEC容量保持一致
if new_cap:
cap = max(servers_cap[j] + np.random.randint(0, 0.3 * self.UE_f), self.server_core_max)
for x in range(self.UEs):
observation[x][2 + j] = cap
downlink[j] = max(downlink[j] - np.random.randint(0, 0.2 * self.r), self.downlink_min)
uplink[j] = max(uplink[j] - np.random.randint(0, 0.2 * self.r), self.uplink_min)
task_size = np.random.randint(10, 50)
task_cpu_cycle = np.random.randint(10 ** 3, 10 ** 5) # 处理任务所需要的CPU频率
observation_ = np.array([task_size, task_cpu_cycle, local_comp])
observation_ = np.hstack((observation_, servers_cap, uplink, downlink))
obs_.append(observation_)
rew.append(reward)
local.append(local_only)
mec.append(mec_only)
ran.append(random_total)
dpg_times.append(time_cost)
local_times.append(local_only_time)
mec_times.append(mec_only_time_cost)
ran_times.append(random_time_cost)
dpg_energys.append(energy_cost)
local_energys.append(local_only_energy)
mec_energys.append(mec_only_energy_cost)
ran_energys.append(random_energy_cost)
total.append(total_cost)
if is_compared:
return obs_, rew, local, mec, ran, dpg_times, local_times, mec_times, ran_times, dpg_energys, local_energys, mec_energys, ran_energys, total
else:
return obs_, rew, dpg_times, dpg_energys