公司有个项目需要五级行政区划,没有现成的数据,写了一段代码,从gj统计j获取的数据。记录一下。
1.引入maven解析html
<!-- jsoup --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency>
2.Java代码实现
@GetMapping("/hh")
public void hh(){
Division d=new Division();
final String url = "https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/";
String provinceurl = "https://www.stats.gov.cn/sj/tjbz/tjyqhdmhcxhfdm/2023/index.html"; // 需要爬取的目标网站地址
try {
Document document = Jsoup.connect(provinceurl).get(); // 获取该网页的文档对象
String title = document.title(); // 获取页面标题
//省
Elements provincetable=document.body().select("tr.provincetr").select("a[href]");
for (Element province : provincetable) {
String provinceHref = province.attr("href"); // 获取链接地址
String provinceText = province.text(); // 获取链接文字
d.setProvincialCode(provinceHref.replace(".html",""));
d.setProvincialName(provinceText);
String cityurl=url+provinceHref;
//System.out.println("cityurl = " + cityurl);
Document citytabledocument = Jsoup.connect(cityurl).get();
//市
Elements citytable=citytabledocument.body().select("table.citytable").select("a[href]");
//System.out.println("citytable = " + citytable);
for(int i=0;i<citytable.size()/2;i++){
d.setMunicipalCode(citytable.get(i).text());
i=i+1;
d.setMunicipalName(citytable.get(i).text());
String cityHref = citytable.get(i).attr("href"); // 获取链接地址
String countyurl=url+cityHref;
System.out.println("countyurl = " + countyurl);
Document countytableocument = Jsoup.connect(countyurl).get();
//区
Elements countytable=countytableocument.body().select("table.countytable").select("a[href]");
for(int j=0;j<countytable.size()/2;j++){
d.setDistrictCode(countytable.get(j).text());
j=j+1;
d.setDistrictName(countytable.get(j).text());
String countyHref = countytable.get(j).attr("href"); // 获取链接地址
String townturl = url + provinceHref.replace(".html", "") + "/" + countyHref;
Document townttableocument = Jsoup.connect(townturl).get();
//镇
Elements towntable = townttableocument.body().select("table.towntable").select("a[href]");
for(int k=0;k<towntable.size()/2;k++){
d.setStreetTownCode(towntable.get(k).text());
k=k+1;
d.setStreetTownName(towntable.get(k).text());
String towntHref = towntable.get(k).attr("href"); // 获取链接地址
String villageurl = townturl.substring(0, townturl.length() - 11) + towntHref;
System.out.println("villageurl = " + villageurl);
Document villagetabledocument = Jsoup.connect(villageurl).get();
//村
Elements villagetable = villagetabledocument.body().select("table.villagetable").select("tr.villagetr");
for (Element village : villagetable) {
String villageText = village.text(); // 获取链接文字
String[] vi = villageText.split(" ");
System.out.println("统计用区划代码: " + vi[0]);
System.out.println("城乡分类代码: " + vi[1]);
System.out.println("名称: " + vi[2]);
d.setCommunityVillageCode(vi[0]);
d.setUrbanRural( vi[1]);
d.setCommunityVillageName(vi[2]);
System.out.println("d.toString() = " + d.toString());
System.out.println("vi = " + vi);
divisionService.insertDivision1(d);
}
//我想让他跑慢点,你可以自己调
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
3.用到的实体类
import com.ruoyi.common.utils.StringUtils;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import com.ruoyi.common.annotation.Excel;
import com.ruoyi.common.core.domain.BaseEntity;
/**
* 行政区划对象 division
*
* @author liphui
* @date 2023-11-17
*/
public class Division extends BaseEntity
{
private static final long serialVersionUID = 1L;
/** 省级代码 */
@Excel(name = "省级代码")
private String provincialCode;
/** 省级名称 */
@Excel(name = "省级名称")
private String provincialName;
/** 市级代码 */
@Excel(name = "市级代码")
private String municipalCode;
/** 市级名称 */
@Excel(name = "市级名称")
private String municipalName;
/** 区县代码 */
@Excel(name = "区县代码")
private String districtCode;
/** 区县名称 */
@Excel(name = "区县名称")
private String districtName;
/** 街镇乡代码 */
@Excel(name = "街镇乡代码")
private String streetTownCode;
/** 街镇乡名称 */
@Excel(name = "街镇乡名称")
private String streetTownName;
/** 社区村级代码 */
@Excel(name = "社区村级代码")
private String communityVillageCode;
/** 社区村级名称 */
@Excel(name = "社区村级名称")
private String communityVillageName;
/** 城乡分类 */
@Excel(name = "城乡分类")
private String urbanRural;
public void setProvincialCode(String provincialCode){
this.provincialCode = provincialCode;
}
public String getProvincialCode(){
return provincialCode;
}
public void setProvincialName(String provincialName){
this.provincialName = provincialName;
}
public String getProvincialName(){
return provincialName;
}
public void setMunicipalCode(String municipalCode){
this.municipalCode = municipalCode;
}
public String getMunicipalCode(){
return municipalCode;
}
public void setMunicipalName(String municipalName){
this.municipalName = municipalName;
}
public String getMunicipalName(){
return municipalName;
}
public void setDistrictCode(String districtCode){
this.districtCode = districtCode;
}
public String getDistrictCode(){
return districtCode;
}
public void setDistrictName(String districtName){
this.districtName = districtName;
}
public String getDistrictName(){
return districtName;
}
public void setStreetTownCode(String streetTownCode){
this.streetTownCode = streetTownCode;
}
public String getStreetTownCode(){
return streetTownCode;
}
public void setStreetTownName(String streetTownName){
this.streetTownName = streetTownName;
}
public String getStreetTownName(){
return streetTownName;
}
public void setCommunityVillageCode(String communityVillageCode){
this.communityVillageCode = communityVillageCode;
}
public String getCommunityVillageCode(){
return communityVillageCode;
}
public void setCommunityVillageName(String communityVillageName){
this.communityVillageName = communityVillageName;
}
public String getCommunityVillageName(){
return communityVillageName;
}
public void setUrbanRural(String urbanRural){
this.urbanRural = urbanRural;
}
public String getUrbanRural(){
return urbanRural;
}
public String getDivisionName(){
StringBuilder stringBuilder = new StringBuilder();
if (StringUtils.isNotEmpty(this.provincialName)){
stringBuilder.append(this.provincialName);
}else {
return stringBuilder.toString();
}
if (StringUtils.isNotEmpty(this.municipalName)){
stringBuilder.append(",").append(this.municipalName);
}else {
return stringBuilder.toString();
}
if (StringUtils.isNotEmpty(this.districtName)){
stringBuilder.append(",").append(this.districtName);
}else {
return stringBuilder.toString();
}
if (StringUtils.isNotEmpty(this.streetTownName)){
stringBuilder.append(",").append(this.streetTownName);
}else {
return stringBuilder.toString();
}
if (StringUtils.isNotEmpty(this.communityVillageName)){
stringBuilder.append(",").append(this.communityVillageName);
}else {
return stringBuilder.toString();
}
return stringBuilder.toString();
}
public String getDivisionCode(){
StringBuilder stringBuilder = new StringBuilder();
if (StringUtils.isNotEmpty(this.provincialCode)){
stringBuilder.append(this.provincialCode);
}else {
return stringBuilder.toString();
}
if (StringUtils.isNotEmpty(this.municipalCode)){
stringBuilder.append(",").append(this.municipalCode);
}else {
return stringBuilder.toString();
}
if (StringUtils.isNotEmpty(this.districtCode)){
stringBuilder.append(",").append(this.districtCode);
}else {
return stringBuilder.toString();
}
if (StringUtils.isNotEmpty(this.streetTownCode)){
stringBuilder.append(",").append(this.streetTownCode);
}else {
return stringBuilder.toString();
}
if (StringUtils.isNotEmpty(this.communityVillageCode)){
stringBuilder.append(",").append(this.communityVillageCode);
}else {
return stringBuilder.toString();
}
return stringBuilder.toString();
}
@Override
public String toString() {
return new ToStringBuilder(this,ToStringStyle.MULTI_LINE_STYLE)
.append("provincialCode", getProvincialCode())
.append("provincialName", getProvincialName())
.append("municipalCode", getMunicipalCode())
.append("municipalName", getMunicipalName())
.append("districtCode", getDistrictCode())
.append("districtName", getDistrictName())
.append("streetTownCode", getStreetTownCode())
.append("streetTownName", getStreetTownName())
.append("communityVillageCode", getCommunityVillageCode())
.append("communityVillageName", getCommunityVillageName())
.append("urbanRural", getUrbanRural())
.toString();
}
}
其他的代码不贴了,就是数据入库。