编写Python函数
import re
def structured_pro(original_text,keyword,out_unit=None):
pattern_split_str='[。,]'
pattern_split=re.compile(pattern_split_str,re.I|re.M)
original_list=pattern_split.split(original_text)
pattern_keyword=re.compile(keyword,re.I|re.M)
time_str='([0-9半]+)([+余]{0,1})([年月周天])'
pattern_time_str=re.compile(time_str,re.I|re.M)
for original in original_list:
searchObj=pattern_keyword.search(original)
if searchObj:
time_unit_Obj=pattern_time_str.search(original)
if time_unit_Obj:
value_unit=time_unit_Obj.group()
value=time_unit_Obj.group(1)
unit=time_unit_Obj.group(3)
# 单位换算
value= 0.5 if value=='半' else float(value)
if unit in('年'):
days=value*365
weeks=value*52
months=value*12
years=value*1
elif unit in('月'):
days=value*30
weeks=value*4.3
months=value*1
years=value/12
elif unit in('周'):
days=value*7
weeks=value*1
months=value/4.3
years=value/52
elif unit in('天','日'):
unit='天'
days=value*1
weeks=value/7
months=value/30
years=value/365
date_dict = {None:value_unit,'天':days,'周':weeks,'月':months,'年':years}
break
date_dict = {None:None,'天':None,'周':None,'月':None,'年':None}
return date_dict[out_unit]
Python函数调用效果
original_text='间断咳嗽、咳痰6天,加重1+月,发热8天。'
structured_pro(original_text,'咳嗽')
#'6天'
structured_pro(original_text,'咳嗽','天')
# 6.0
structured_pro(original_text,'咳嗽','年')
# 0.01643835616438356
重构为PostgreSQL函数
CREATE OR REPLACE FUNCTION pgsql_structured_pro(original_text text,keyword text,out_unit text default null)
RETURNS text
AS $$
def structured_pro(original_text,keyword,out_unit):
import re
pattern_split_str='[。,]'
pattern_split=re.compile(pattern_split_str,re.I|re.M)
original_list=pattern_split.split(original_text)
pattern_keyword=re.compile(keyword,re.I|re.M)
time_str='([0-9半]+)([+余-]{0,1})([年月周天])'
pattern_time_str=re.compile(time_str,re.I|re.M)
for original in original_list:
searchObj=pattern_keyword.search(original)
if searchObj:
time_unit_Obj=pattern_time_str.search(original)
if time_unit_Obj:
value_unit=time_unit_Obj.group()
value=time_unit_Obj.group(1)
unit=time_unit_Obj.group(3)
# 单位换算
value= 0.5 if value=='半' else float(value)
if unit in('年'):
days=value*365
weeks=value*52
months=value*12
years=value*1
elif unit in('月'):
days=value*30
weeks=value*4.3
months=value*1
years=value/12
elif unit in('周'):
days=value*7
weeks=value*1
months=value/4.3
years=value/52
elif unit in('天','日'):
unit='天'
days=value*1
weeks=value/7
months=value/30
years=value/365
date_dict = {None:value_unit,'天':days,'周':weeks,'月':months,'年':years}
break
date_dict = {None:None,'天':None,'周':None,'月':None,'年':None}
return date_dict[out_unit]
return structured_pro(original_text,keyword,out_unit)
$$ LANGUAGE plpython3u;
函数调用
SELECT "就诊编号", "入院记录主诉",
pgsql_structured_pro("入院记录主诉",'咳嗽',NULL) "咳嗽",
pgsql_structured_pro("入院记录主诉",'咳嗽','天')::FLOAT "天"
FROM "支气管扩张_2入院记录"