效果图如下:
第一步获取热搜
public List<String> getHotNews4(Integer size) {
if (size <= 0 || StringUtils.isEmpty(size)) {
return null;
}
try {
//set 转list
return new ArrayList<>(getHotNews(size));
} catch (Exception e) {
logger.error("抓取热点排行榜异常:", e);
return null;
}
}
public Set<String> getHotNews(int size) {
try {
List<NewsModel> list1 = grabBaiduHotNews();
List<NewsModel> list2 = grabWeiBoHotNews();
List<NewsModel> list3 = getDouYinHotNews();
//40%的权重是百度,30%的权重是微博 ,30%的权重是抖音
int l1 = size * 40 / 100;
int l2 = size * 30 / 100;
int l3 = size - l1 - l2;
Set<String> set = new HashSet<>();
if (list1.size() < l1) {
l1 = list1.size();
l2 = size - l1 - l2;
}
if (list2.size() < l2) {
l2 = list2.size();
l3 = size - l1 - l2;
}
if (list3.size() < l3) {
l3 = list3.size();
}
int j = 0;
for (j = 0; j < l1; j++) {
set.add(list1.get(j).getTitle());
// sb.append(list1.get(i).getTitle()).append("。");
}
for (int i = 0; i < l2; i++) {
set.add(list2.get(i).getTitle());
}
for (int i = 0; i < l3; i++) {
set.add(list3.get(i).getTitle());
}
if (set.size() < size) {
int n = size - set.size();
if ((list1.size() - l1) < n) {
n = list1.size() - l1;
}
for (int i = 0; i < n; i++) {
set.add(list1.get(++j).getTitle());
}
}
return set;
} catch (Exception e) {
logger.error("抓取热点排行榜异常:", e);
return null;
}
}
/**
* 抓取百度热点排行榜
*
* @return
*/
public List<NewsModel> grabBaiduHotNews() {
String url = "https://top.baidu.com/board?tab=realtime&sa=fyb_realtime_31065";
List<NewsModel> list = new ArrayList<>();
try {
Document doc = Jsoup.connect(url).get();
//标题
Elements titles = doc.select(".c-single-text-ellipsis");
//图片
Elements imgs = doc.select(".category-wrap_iQLoo .index_1Ew5p").next("img");
//内容
Elements contents = doc.select(".hot-desc_1m_jR.large_nSuFU");
//推荐图
Elements urls = doc.select(".category-wrap_iQLoo a.img-wrapper_29V76");
//热搜指数
Elements levels = doc.select(".hot-index_1Bl1a");
for (int i = 0; i < levels.size(); i++) {
NewsModel o = new NewsModel();
o.setTitle(titles.get(i).text().trim());
o.setImg(imgs.get(i).attr("src"));
o.setContent(contents.get(i).text().replaceAll("查看更多>", "").trim());
o.setUrl(urls.get(i).attr("href"));
// o.setLevel(levels.get(i).text().trim());
list.add(o);
}
return list;
} catch (IOException e) {
logger.error("抓取百度热点排行榜异常:" + e.getMessage());
}
return null;
}
/**
* 抓取微博热搜榜
*/
public List<NewsModel> grabWeiBoHotNews() {
String url = "https://weibo.com/ajax/statuses/hot_band";
String s = HttpUtil.get(url);
WeiBoHot weiBoHot = JSON.parseObject(s, WeiBoHot.class);
List<NewsModel> list = new ArrayList<>();
for (WeiBoHot.WeiBo weiBo : weiBoHot.getData().getBand_list()) {
NewsModel o = new NewsModel();
o.setTitle(weiBo.getNote());
o.setImg(weiBo.getMblog());
o.setContent(weiBo.getWord());
o.setUrl(weiBo.getWord_scheme());
list.add(o);
}
return list;
}
public List<NewsModel> getDouYinHotNews() {
try {
String s = HttpUtils.get("https://www.iesdouyin.com/web/api/" +
"v2/hotsearch/billboard/word/?reflow_source=reflow_page");
List<NewsModel> list = new ArrayList<>();
JSONArray jsonArray = JSON.parseObject(s).getJSONArray("word_list");
for (int i = 0; i < jsonArray.size(); i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
list.add(new NewsModel(jsonObject.getString("word")));
}
return list;
} catch (Exception e) {
logger.error("", e);
}
//防止空指针
return new ArrayList<>();
}
第二步生成图片
public class WordCodeUtil {
private static final Logger logger = LoggerFactory.getLogger(WordCodeUtil.class);
public static WordCloud getWordCode(List<String> words) {
words.remove(words.size() - 1);
FrequencyAnalyzer frequencyAnalyzer = new FrequencyAnalyzer();
frequencyAnalyzer.setWordFrequenciesToReturn(600);
frequencyAnalyzer.setMinWordLength(3);
// 引入中文解析器
frequencyAnalyzer.setWordTokenizer(new ChineseWordTokenizer());
final List<WordFrequency> wordFrequencyList = frequencyAnalyzer.load(words);
int i = words.size();
for (String s : words) {
wordFrequencyList.add(new WordFrequency(s, i--));
}
// 设置图片分辨率
Dimension dimension = new Dimension(940, 400);
// 创建词云对象
WordCloud wordCloud = new WordCloud(dimension, CollisionMode.PIXEL_PERFECT);
// 加载字体文件
try (BufferedInputStream bis = getFileFromResource("LXGWWenKaiMono-Bold.ttf")) {
Font font = Font.createFont(Font.TRUETYPE_FONT, bis);
wordCloud.setKumoFont(new KumoFont(font));
} catch (IOException e) {
logger.error("加载字体文件失败!",e);
} catch (FontFormatException e) {
throw new RuntimeException(e);
}
// 设置词云样式
wordCloud.setPadding(3);
wordCloud.setColorPalette(new ColorPalette(
new Color(0xed1941), new Color(0xf26522), new Color(0x845538),
new Color(0x8a5d19), new Color(0x7f7522), new Color(0x5c7a29),
new Color(0x1d953f), new Color(0x007d65), new Color(0x65c294)
));
wordCloud.setBackground(new RectangleBackground(dimension));
wordCloud.setFontScalar(new SqrtFontScalar(12, 45));
wordCloud.setBackgroundColor(new Color(255, 255, 255));
// 生成词云
wordCloud.build(wordFrequencyList);
return wordCloud;
}
//返回Base64到览器
public static String getWordCloud2(List<String> words){
return BufferedImageToBase64(getWordCode(words).getBufferedImage());
}
public static void getWordCloud3(List<String> words ,
HttpServletResponse response) throws IOException {
WordCloud wordCloud = getWordCode(words);
wordCloud.writeToStreamAsPNG(response.getOutputStream());
response.flushBuffer();
}
/**
* 方法六:使用Hutool的ResourceUtil
* 备注:jar包可用
*
* @param fileName
* @return
* @throws IOException
*/
public static BufferedInputStream getFileFromResource(String fileName) throws IOException {
List<URL> resources = ResourceUtil.getResources(fileName);
URL resource = resources.get(0);
return new BufferedInputStream(resource.openStream());
}
/**
* BufferedImage 编码转换为 base64
* @param bufferedImage
* @return
*/
private static String BufferedImageToBase64(BufferedImage bufferedImage) {
ByteArrayOutputStream bao = new ByteArrayOutputStream();//io流
try {
ImageIO.write(bufferedImage, "png", bao);//写入流中
} catch (IOException e) {
logger.error(e.getMessage(),e);
}
byte[] bytes = Base64.getEncoder().encode(bao.toByteArray());
String base64 = new String(bytes);
base64 = base64.replaceAll("\n", "").replaceAll("\r", "");//删除 \r\n
return "data:image/png;base64," + base64;
}
}
完整代码可访问https://github.com/KingJin-web/open_api 获取