Gradio 案例——将文本文件转为词云图
- 利用 word_cloud 库,将文本文件转为词云图
- 更完整、丰富的示例项目见 GitHub - AlionSSS/wordcloud-webui: The web UI for word_cloud(text to word cloud picture converter)
界面截图
依赖安装
- 新建一个虚拟环境 Python 3.9.16
- 依赖
$ pip install gradio==4.29 -i "https://pypi.doubanio.com/simple/"
$ pip install wordcloud==1.9.3 -i "https://pypi.doubanio.com/simple/"
$ pip install jieba==0.42.1 -i "https://pypi.doubanio.com/simple/"
项目目录结构
wordcloud-webui # 目录
--/resources # 资源目录
--/consts.py # py文件,常量
--/gradio_interfaces.py # py文件,Gradio视图
--/jieba_util.py # py文件,工具库文件
--/lib_word_cloud.py # py文件,工具库文件
--/main.py # py文件,入口
代码
from gradio_interfaces import iface
if __name__ == "__main__":
iface.launch()
from wordcloud import WordCloud, ImageColorGenerator
import numpy as np
from PIL import Image
from consts import *
def text2wordcount_normal(
text: str,
background_color: str = "white",
margin = 2,
min_font_size = 4,
max_font_size = 200,
font_path = None,
width: int = 400,
height: int = 200,
):
if not background_color or "" == str(background_color).strip():
background_color = "white"
if not min_font_size or min_font_size < 1:
min_font_size = 4
if not max_font_size or max_font_size < 4:
max_font_size = 200
if not font_path or "" == str(font_path).strip():
font_path = DEFAULT_FONT_PATH
if not width or width < 1:
width = 400
if not height or height < 1:
height = 200
wordcloud = WordCloud(
font_path=font_path,
width=width, height=height, background_color=background_color,
max_words=2000,
margin=margin, min_font_size=min_font_size, max_font_size=max_font_size,
random_state=42
).generate(text)
return wordcloud.to_image()
def text2wordcount_mask(
text: str,
background_color: str = "white",
margin = 2,
min_font_size = 4,
max_font_size = 200,
font_path = None,
mask_image = None,
mask_color = None,
contour_width=3,
contour_color="steelblue",
):
if not background_color or "" == str(background_color).strip():
background_color = "white"
if not min_font_size or min_font_size < 1:
min_font_size = 4
if not max_font_size or max_font_size < 4:
max_font_size = 200
if not font_path or "" == str(font_path).strip():
font_path = DEFAULT_FONT_PATH
if not contour_width or contour_width < 0:
contour_width = 3
if not contour_color or "" == str(contour_color).strip():
contour_color = "steelblue"
if mask_color is not None:
image_colors = ImageColorGenerator(mask_color, True)
else:
image_colors = ImageColorGenerator(mask_image, True)
wordcloud = WordCloud(
font_path=font_path,
mask=mask_image,
background_color=background_color,
color_func=image_colors,
contour_width=contour_width,
contour_color=contour_color,
max_words=2000,
margin=margin, min_font_size=min_font_size, max_font_size=max_font_size,
random_state=42
).generate(text)
return wordcloud.to_image()
import jieba
from consts import *
def jieba_processing_txt(text, userdict_list=['阿Q', '孔乙己', '单四嫂子']):
if userdict_list is not None:
for word in userdict_list:
jieba.add_word(word)
mywordlist = []
seg_list = jieba.cut(text, cut_all=False)
liststr = "/ ".join(seg_list)
with open(STOPWORDS_PATH, encoding='utf-8') as f_stop:
f_stop_text = f_stop.read()
f_stop_seg_list = f_stop_text.splitlines()
for myword in liststr.split('/'):
if not (myword.strip() in f_stop_seg_list) and len(myword.strip()) > 1:
mywordlist.append(myword)
return ' '.join(mywordlist)
import gradio as gr
import lib_word_cloud
import jieba_util
from consts import *
def service_text2wc(
text_file,
text_lang,
text_dict: str,
background_color,
margin,
max_font_size,
min_font_size,
font_file,
width,
height,
mask_image,
mask_color,
contour_width,
contour_color,
):
if not text_file:
gr.Warning(f"请传入正确的文本文件!")
return
if margin < 0 :
gr.Warning(f"字体间隔配置不合法!")
return
if min_font_size < 0 or max_font_size < 0 or min_font_size > max_font_size:
gr.Warning(f"字体大小配置不合法!")
return
try:
with open(file=text_file.name, encoding="utf-8") as file:
text = file.read()
if text_lang == '中文':
gr.Info(f"选择了中文,将使用Jieba库解析文本!")
userdict_list = []
if text_dict is not None:
userdict_list = [w.strip() for w in text_dict.split(",")]
text = jieba_util.jieba_processing_txt(text, userdict_list)
font_path = font_file.name if font_file else None
if mask_image is not None:
return lib_word_cloud.text2wordcount_mask(
text,
background_color,
margin,
min_font_size,
max_font_size,
font_path,
mask_image,
mask_color,
contour_width,
contour_color,
)
else:
return lib_word_cloud.text2wordcount_normal(
text,
background_color,
margin,
min_font_size,
max_font_size,
font_path,
width,
height
)
except Exception as e:
print(e)
raise gr.Error("文本转词云图时,发生异常:" + str(e))
js = """
function createGradioAnimation() {
var container = document.createElement('div');
container.id = 'gradio-animation';
container.style.fontSize = '2em';
container.style.fontWeight = 'bold';
container.style.textAlign = 'center';
container.style.marginBottom = '20px';
var text = '欢迎使用“词云转换器”!';
for (var i = 0; i < text.length; i++) {
(function(i){
setTimeout(function(){
var letter = document.createElement('span');
letter.style.opacity = '0';
letter.style.transition = 'opacity 0.5s';
letter.innerText = text[i];
container.appendChild(letter);
setTimeout(function() {
letter.style.opacity = '1';
}, 50);
}, i * 200);
})(i);
}
var gradioContainer = document.querySelector('.gradio-container');
gradioContainer.insertBefore(container, gradioContainer.firstChild);
return 'Animation created';
}
"""
with gr.Blocks(title="词云转换器", js=js) as iface:
with gr.Row():
with gr.Column():
with gr.Group():
with gr.Row():
input_text_file = gr.File(label="待处理的文本文件(必填)")
with gr.Column():
gr.Label(label="Tips", value="请传入正常可读的文本文件,如以.txt结尾的文档", color="#fee2e2")
gr.File(value=EXAMPLE_TEXT_FILE, label="文本文件的样例")
input_text_lang = gr.Radio(label="文本语言模式", choices=["中文", "英文"], value="中文")
input_text_dict = gr.Textbox(label="自定义分词词典(可选)", info="中文模式使用,多个词之间用英文逗号分隔,例如'阿Q, 孔乙己, 单四嫂子'")
with gr.Tab("普通模式"):
with gr.Row():
input_width = gr.Number(value=400, label="生成图像的宽", minimum=1)
input_height = gr.Number(value=200, label="生成图像的高", minimum=1)
gr.Label(label="Tips", value="使用该模式时,记得清理掉“Mask模式”下的“Mask图像”", color="#fee2e2")
with gr.Tab("Mask模式"):
with gr.Row():
input_contour_width = gr.Number(value=3, label="轮廓线的粗细", minimum=0)
input_contour_color = gr.Textbox(value="steelblue", label="轮廓线的颜色")
with gr.Row():
input_mask_image = gr.Image(label="Mask图像(决定词云的形状、颜色、宽高)")
input_mask_color = gr.Image(label="若传入该图,则词云的颜色由该图决定")
gr.Gallery(value=[EXAMPLE_MASK_IMAGE_PATH, EXAMPLE_MASK_IMAGE_PATH, EXAMPLE_MASK_IMAGE_PATH], label="Mask图像的样例", interactive=False)
with gr.Column():
with gr.Group():
with gr.Row():
with gr.Group():
input_bg_color = gr.Textbox(value="white", label="词云图的背景色(默认为'white')")
input_margin = gr.Number(value=2, label="字体间隔(默认为'2')", minimum=0)
with gr.Row():
input_min_font_size = gr.Number(value=4, label="字体大小-最小值", minimum=1)
input_max_font_size = gr.Number(value=200, label="字体大小-最大值", minimum=4)
input_font_file = gr.File(label="词云图的字体文件(可选,如otf文件)")
format_radio = gr.Radio(choices=["png", "jpeg", "webp", "bmp", "tiff"], label="词云图像格式", value="png")
submit_button = gr.Button("开始处理", variant="primary")
output_image = gr.Image(label="词云图", format="png")
def fix_format(x):
output_image.format = x
return None
format_radio.change(fn=fix_format, inputs=format_radio)
submit_button.click(
fn=service_text2wc,
inputs=[
input_text_file,
input_text_lang,
input_text_dict,
input_bg_color,
input_margin,
input_max_font_size,
input_min_font_size,
input_font_file,
input_width,
input_height,
input_mask_image,
input_mask_color,
input_contour_width,
input_contour_color,
],
outputs=output_image,
)
- consts.py,记得修改下下面文件的地址,和resource目录对应
EXAMPLE_TEXT_FILE = r".\wordcloud-webui\resources\CalltoArms.txt"
EXAMPLE_MASK_IMAGE_PATH = r".\wordcloud-webui\resources\parrot_mask.png "
STOPWORDS_PATH = r".\wordcloud-webui\resources\stopwords_cn_en.txt"
DEFAULT_FONT_PATH = r".\wordcloud-webui\resources\SourceHanSerifK-Light.otf"
- resources 目录
- parrot_mask.png
- CalltoArms.txt https://github.com/amueller/word_cloud/blob/main/examples/wc_cn/CalltoArms.txt
- SourceHanSerifK-Light.otf https://github.com/amueller/word_cloud/blob/main/examples/fonts/SourceHanSerif/SourceHanSerifK-Light.otf
- stopwords_cn_en.txt https://github.com/amueller/word_cloud/blob/main/examples/wc_cn/stopwords_cn_en.txt