编写Python函数
 
import re
def structured_pro(original_text,keyword,out_unit=None):
    pattern_split_str='[。,]'
    pattern_split=re.compile(pattern_split_str,re.I|re.M)
    original_list=pattern_split.split(original_text)
    pattern_keyword=re.compile(keyword,re.I|re.M)
    time_str='([0-9半]+)([+余]{0,1})([年月周天])'
    pattern_time_str=re.compile(time_str,re.I|re.M)
    for original in original_list:
        searchObj=pattern_keyword.search(original)
        if searchObj:
            time_unit_Obj=pattern_time_str.search(original)
            if time_unit_Obj:
                value_unit=time_unit_Obj.group()
                value=time_unit_Obj.group(1)
                unit=time_unit_Obj.group(3)
                # 单位换算
                value= 0.5 if value=='半' else float(value)
                if unit in('年'):
                    days=value*365
                    weeks=value*52
                    months=value*12
                    years=value*1
                elif unit in('月'):
                    days=value*30
                    weeks=value*4.3
                    months=value*1
                    years=value/12
                elif unit in('周'):
                    days=value*7
                    weeks=value*1
                    months=value/4.3
                    years=value/52
                elif unit in('天','日'):
                    unit='天'
                    days=value*1
                    weeks=value/7
                    months=value/30
                    years=value/365
                date_dict = {None:value_unit,'天':days,'周':weeks,'月':months,'年':years}
                break
        date_dict = {None:None,'天':None,'周':None,'月':None,'年':None}
    return date_dict[out_unit]
 
Python函数调用效果
 
original_text='间断咳嗽、咳痰6天,加重1+月,发热8天。'
structured_pro(original_text,'咳嗽')
#'6天'
structured_pro(original_text,'咳嗽','天')
# 6.0
structured_pro(original_text,'咳嗽','年')
# 0.01643835616438356
 
 
 
重构为PostgreSQL函数
 
CREATE OR REPLACE FUNCTION pgsql_structured_pro(original_text text,keyword text,out_unit text default null)
    RETURNS text
AS $$
def structured_pro(original_text,keyword,out_unit):
    import re
    pattern_split_str='[。,]'
    pattern_split=re.compile(pattern_split_str,re.I|re.M)
    original_list=pattern_split.split(original_text)
    pattern_keyword=re.compile(keyword,re.I|re.M)
    time_str='([0-9半]+)([+余-]{0,1})([年月周天])'
    pattern_time_str=re.compile(time_str,re.I|re.M)
    for original in original_list:
        searchObj=pattern_keyword.search(original)
        if searchObj:
            time_unit_Obj=pattern_time_str.search(original)
            if time_unit_Obj:
                value_unit=time_unit_Obj.group()
                value=time_unit_Obj.group(1)
                unit=time_unit_Obj.group(3)
                # 单位换算
                value= 0.5 if value=='半' else float(value)
                if unit in('年'):
                    days=value*365
                    weeks=value*52
                    months=value*12
                    years=value*1
                elif unit in('月'):
                    days=value*30
                    weeks=value*4.3
                    months=value*1
                    years=value/12
                elif unit in('周'):
                    days=value*7
                    weeks=value*1
                    months=value/4.3
                    years=value/52
                elif unit in('天','日'):
                    unit='天'
                    days=value*1
                    weeks=value/7
                    months=value/30
                    years=value/365
                date_dict = {None:value_unit,'天':days,'周':weeks,'月':months,'年':years}
                break
        date_dict = {None:None,'天':None,'周':None,'月':None,'年':None}
    return date_dict[out_unit]
return structured_pro(original_text,keyword,out_unit)
$$ LANGUAGE plpython3u;
 
函数调用
 
SELECT "就诊编号", "入院记录主诉",
pgsql_structured_pro("入院记录主诉",'咳嗽',NULL) "咳嗽",
pgsql_structured_pro("入院记录主诉",'咳嗽','天')::FLOAT "天"
FROM "支气管扩张_2入院记录"
 
