PHP源码
分享77个PHP源码,总有一款适合您
下面是文件的名字,我放了一些图片,文章里不是所有的图主要是放不下...,
77个PHP源码下载链接:https://pan.baidu.com/s/12hh-lhIVPL1bZw-d2sfVlQ?pwd=vhqj
提取码:vhqj
Python采集代码下载链接:采集代码.zip - 蓝奏云
import os
import shutil
import time
from time import sleep
import requests
from bs4 import BeautifulSoup
from docx import Document
from docx.shared import Inches
from framework.base.BaseFrame import BaseFrame
from sprider.business.DownLoad import DownLoad
from sprider.business.SeleniumTools import SeleniumTools
from sprider.business.SpriderTools import SpriderTools
from selenium import webdriver
from selenium.webdriver.common.by import By
from sprider.model.SpriderEntity import SpriderEntity
from sprider.access.SpriderAccess import SpriderAccess
class HuaJunCode:
base_url = "https://down.chinaz.com" # 采集的网址
save_path = "D:\\Freedom\\Sprider\\ChinaZ\\"
sprider_count = 146 # 采集数量
sprider_start_count=6405#
word_content_list = []
folder_name = ""
page_end_number=0
max_pager=15 #每页的数量
haved_sprider_count =0 # 已经采集的数量
page_count = 1 # 每个栏目开始业务content="text/html; charset=gb2312"
filter_down_file=[]
def __init__(self):
pass
def sprider(self,title_name="NET"):
"""
采集
PHP https://down.chinaz.com/class/572_5_1.htm
NET https://down.chinaz.com/class/572_4_1.htm
ASP https://down.chinaz.com/class/572_3_1.htm
Python https://down.chinaz.com/class/604_572_1.htm
https://down.chinaz.com/class/608_572_1.htm
微信 https://down.chinaz.com/class/610_572_1.htm
Ruby https://down.chinaz.com/class/622_572_1.htm
NodeJs https://down.chinaz.com/class/626_572_1.htm
C https://down.chinaz.com/class/594_572_1.htm
:return:
"""
if title_name == "PHP":
self.folder_name = "PHP源码"
self.second_column_name = "572_5"
elif title_name == "Go":
self.folder_name = "Go源码"
self.second_column_name = "606_572"
elif title_name == "NET":
self.folder_name = "NET源码"
self.second_column_name = "572_4"
elif title_name == "ASP":
self.folder_name = "ASP源码"
self.second_column_name = "572_3"
elif title_name == "Python":
self.folder_name = "Python源码"
self.second_column_name = "604_572"
elif title_name == "JavaScript":
self.folder_name = "JavaScript源码"
self.second_column_name = "602_572"
elif title_name == "Java":
self.folder_name = "Java源码"
self.second_column_name = "572_517"
elif title_name == "HTML":
self.folder_name = "HTML-CSS源码"
self.second_column_name = "608_572"
elif title_name == "TypeScript":
self.folder_name = "TypeScript源码"
self.second_column_name = "772_572"
elif title_name == "微信小程序":
self.folder_name = "微信小程序源码"
self.second_column_name = "610_572"
elif title_name == "Ruby":
self.folder_name = "Ruby源码"
self.second_column_name = "622_572"
elif title_name == "NodeJs":
self.folder_name = "NodeJs源码"
self.second_column_name = "626_572"
elif title_name == "C++":
self.folder_name = "C++源码"
self.second_column_name = "596_572"
elif title_name == "C":
self.folder_name = "C源码"
self.second_column_name = "594_572"
#https://down.chinaz.com/class/594_572_1.htm
first_column_name = title_name # 一级目录
self.sprider_category = title_name # 一级目录
second_folder_name = str(self.sprider_count) + "个" + self.folder_name #二级目录
self.sprider_type =second_folder_name
self.merchant=int(self.sprider_start_count) //int(self.max_pager)+1 #起始页码用于效率采集
self.file_path = self.save_path + os.sep + "Code" + os.sep + first_column_name + os.sep + second_folder_name
self.save_path = self.save_path+ os.sep + "Code" + os.sep+first_column_name+os.sep + second_folder_name+ os.sep + self.folder_name
BaseFrame().debug("开始采集ChinaZCode"+self.folder_name+"...")
sprider_url = (self.base_url + "/class/{0}_1.htm".format(self.second_column_name))
down_path="D:\\Freedom\\Sprider\\ChinaZ\\Code\\"+first_column_name+"\\"+second_folder_name+"\\Temp\\"
if os.path.exists(down_path) is True:
shutil.rmtree(down_path)
if os.path.exists(down_path) is False:
os.makedirs(down_path)
if os.path.exists(self.save_path ) is True:
shutil.rmtree(self.save_path )
if os.path.exists(self.save_path ) is False:
os.makedirs(self.save_path )
chrome_options = webdriver.ChromeOptions()
diy_prefs ={'profile.default_content_settings.popups': 0,
'download.default_directory':'{0}'.format(down_path)}
# 添加路径到selenium配置中
chrome_options.add_experimental_option('prefs', diy_prefs)
chrome_options.add_argument('--headless') #隐藏浏览器
# 实例化chrome浏览器时,关联忽略证书错误
driver = webdriver.Chrome(options=chrome_options)
driver.set_window_size(1280, 800) # 分辨率 1280*800
# driver.get方法将定位在给定的URL的网页,get接受url可以是任何网址,此处以百度为例
driver.get(sprider_url)
# content = driver.page_source
# print(content)
div_elem = driver.find_element(By.CLASS_NAME, "main") # 列表页面 核心内容
element_list = div_elem.find_elements(By.CLASS_NAME, 'item')
laster_pager_ul = driver.find_element(By.CLASS_NAME, "el-pager")
laster_pager_li =laster_pager_ul.find_elements(By.CLASS_NAME, 'number')
laster_pager_url = laster_pager_li[len(laster_pager_li) - 1]
page_end_number = int(laster_pager_url.text)
self.page_count=self.merchant
while self.page_count <= int(page_end_number): # 翻完停止
try:
if self.page_count == 1:
self.sprider_detail(driver,element_list,self.page_count,page_end_number,down_path)
pass
else:
if self.haved_sprider_count == self.sprider_count:
BaseFrame().debug("采集到达数量采集停止...")
BaseFrame().debug("开始写文章...")
self.builder_word(self.folder_name, self.word_content_list)
BaseFrame().debug("文件编写完毕,请到对应的磁盘查看word文件和下载文件!")
break
#(self.base_url + "/sort/{0}/{1}/".format(url_index, self.page_count))
#http://soft.onlinedown.net/sort/177/2/
next_url = self.base_url + "/class/{0}_{1}.htm".format(self.second_column_name, self.page_count)
driver.get(next_url)
div_elem = driver.find_element(By.CLASS_NAME, "main") # 列表页面 核心内容
element_list = div_elem.find_elements(By.CLASS_NAME, 'item')
self.sprider_detail( driver, element_list, self.page_count, page_end_number, down_path)
pass
#print(self.page_count)
self.page_count = self.page_count + 1 # 页码增加1
except Exception as e:
print("sprider()执行过程出现错误:" + str(e))
sleep(1)
37Design Music Box v2.0
51CHA.COM 网站综合信息查询系统 v1.0
7LOG v1.0 正式版
ActualCounter v2.0
AutoBoss v1.0
BBSVisual for Lrc(PHP) Build 0529
Bo-Blog v1.6 Build 1204
Coppermine Photo v1.3.3 多国语言版
CZ 对战系统(CZ Online Match System) v1.3 繁体中文版
Discuz! v2.2F 简体中文版
Discuz! v2.2F 繁体中文版
Discuz! v2.5F 简体中文免费版
Discuz! v2.5F 繁体中文免费版
exBlog v1.2.0 [L] 圣诞版 Build 1222
exBlog v1.3.1
def sprider_detail(self, driver,element_list,page_count,max_page,down_path):
"""
采集明细页面
:param driver:
:param element_list:
:param page_count:
:param max_page:
:param down_path:
:return:
"""
index = 0
element_array=[]
element_length=len(element_list)
for element in element_list:
url_A_obj = element.find_element(By.CLASS_NAME, 'name-text')
next_url = url_A_obj.get_attribute("href")
coder_title = url_A_obj.get_attribute("title")
e=coder_title+"$"+ next_url
element_array.append(e)
pass
if int(self.page_count) == int(self.merchant):
self.sprider_start_index = int(self.sprider_start_count) % int(self.max_pager)
index=self.sprider_start_index
while index < element_length:
if os.path.exists(down_path) is False:
os.makedirs(down_path)
if self.haved_sprider_count == self.sprider_count:
BaseFrame().debug("采集到达数量采集停止...")
break
#element = element_list[index]
element=element_array[index]
time.sleep(1)
index = index + 1
sprider_info="正在采集第"+str(page_count)+"页的第"+str(index)+"个资源,共"+str(max_page)+"页资源"
BaseFrame().debug(sprider_info)
next_url=element.split("$")[1]
coder_title=element.split("$")[0]
# next_url = element.find_element(By.TAG_NAME, 'a').get_attribute("href")
# coder_title =element.find_element(By.TAG_NAME, 'img').get_attribute("title")
try:
# codeEntity = SpriderEntity() # 下载过的资源不再下载
# codeEntity.sprider_base_url = self.base_url
# codeEntity.create_datetime = SpriderTools.get_current_datetime()
# codeEntity.sprider_url = next_url
# codeEntity.sprider_pic_title = coder_title
# codeEntity.sprider_pic_index = str(index)
# codeEntity.sprider_pager_index = page_count
# codeEntity.sprider_type = self.sprider_type
# if SpriderAccess().query_sprider_entity_by_urlandindex(next_url, str(index)) is None:
# SpriderAccess().save_sprider(codeEntity)
# else:
# BaseFrame().debug(coder_title+next_url + "数据采集过因此跳过")
# continue
driver.get(next_url) # 请求明细页面1
if SeleniumTools.judeg_element_isexist(driver, "CLASS_NAME", "download-item") == 3:
driver.back()
BaseFrame().debug(coder_title+"不存在源码是soft因此跳过哦....")
continue
print("准备点击下载按钮...")
driver.find_element(By.CLASS_NAME, "download-item").click() #下载源码
sleep(1)
result,message=SpriderTools.judge_file_exist(True,240,1,down_path,self.filter_down_file,"zip|rar|gz|tgz")#判断源码
if result is True:
sprider_content = [coder_title, self.save_path + os.sep +"image"+ os.sep + coder_title + ".jpg"] # 采集成功的记录
self.word_content_list.append(sprider_content) # 增加到最终的数组
self.haved_sprider_count = self.haved_sprider_count + 1
BaseFrame().debug("已经采集完成第" + str(self.haved_sprider_count) + "个")
time.sleep(1)
driver.back()
coder_title = str(coder_title).replace("/", "") #去掉windows不识别的字符
files = os.listdir(down_path)
file_name = files[0] # 获取默认值
srcFile = down_path + os.sep + file_name
file_ext = os.path.splitext(srcFile)[-1]
dstFile = down_path + os.sep + coder_title + file_ext
os.rename(srcFile, dstFile)
srcFile = dstFile
dstFile = self.save_path + os.sep + coder_title + file_ext
shutil.move(srcFile, dstFile) # 移动文件
else:
files = os.listdir(down_path) # 读取目录下所有文件
coder_title = str(coder_title).replace("/", "") # 去掉windows不识别的字符
try:
if str(message)=="0个文件认定是False":
BaseFrame().error(coder_title+"文件不存在...")
shutil.rmtree(down_path) # 如果没下载完是无法删除的
pass
else:
BaseFrame().error("检测下载文件出错可能原因是等待时间不够已经超时,再等待60秒...")
time.sleep(60)
shutil.rmtree(down_path) #如果没下载完是无法删除的
#清空数组
self.filter_down_file.clear()
except Exception as e:
# 使用数组append记录文件名字 移动的时候过滤
self.builder_filter_file(files)
pass
except Exception as e:
BaseFrame().error("sprider_detail()执行过程出现错误:" + str(e))
BaseFrame().error("sprider_detail()记录下载的文件名")
# 使用数组append记录文件名字 移动的时候过滤
files = os.listdir(down_path) # 读取目录下所有文件
self.builder_filter_file(files)
if(int(page_count)==int(max_page)):
self.builder_word(self.folder_name,self.word_content_list)
BaseFrame().debug("文件编写完毕,请到对应的磁盘查看word文件和下载文件!")
gnbook 留言本信息发布系统 v1.5
Google Pagerank Inspector v1.0
GreenTea(绿茶)系统 Rc1
GreyDream BulletinBoard v1.0
Kacd 留言本 v2005
KECOMS v1.0
LimboCMS v1.0.3.6 Beta 汉化版
Login Manager v3.0
LOOKAN bt小偷 v2.2 修正版
Mambo v4.5.2 UTF-8体验版
Mambo v4.5.2 简体中文版
Mambo v4.5.2+SMF v1.0.3 整合版
McGallery Pro v2.0
Mini Site Manager v1.0.1
MusicBox v2.1
n@log analyzer v5.04 多国语言修正版
net2ftp v0.82 多国语言版
WSN Links v3.15
XOOPS 2005 新春特别版
XOOPS v2.0.9 CMSChina中文版
凯文下载系统(kevin download system) v1.2
可慧网站内容管理系统KehuiCMS v7.1
回忆造形留言板SK-Gbook v0.0
天地网络同学录 v1.0
天地网络同学录 v3.0 升级包
孙悟空个人搜索引擎联盟 v1.0
无忧搜网 Google PageRank 及时速查系统 v2.2
板蛋村留言本 v1.4
梦缘日记本 v1.0 测试版
气功猪下载系统 v1.1
菁菁整站 v2.0 简洁版
菁菁整站 v2.0 豪华版
菁菁整站discuz&phpwind v1.61
菁菁整站系统 v1.5
逆向发布(小偷)系统 v.3
醉石音乐 v1.2
金玄网整站程序 vBoard RC1
鸿天全站HUGESKY v4.0
import os
# 查找指定文件夹下所有相同名称的文件
def search_file(dirPath, fileName):
dirs = os.listdir(dirPath) # 查找该层文件夹下所有的文件及文件夹,返回列表
for currentFile in dirs: # 遍历列表
absPath = dirPath + '/' + currentFile
if os.path.isdir(absPath): # 如果是目录则递归,继续查找该目录下的文件
search_file(absPath, fileName)
elif currentFile == fileName:
print(absPath) # 文件存在,则打印该文件的绝对路径
os.remove(absPath)
Ofstar v2.0 张道宁插件 v1.5
Ofstar v2.0 张道宁插件版 v1.0
On-line FTP Management Professional v1.22
On-line FTP Management Public v1.62
OsoonCRM v2.1
PHP Excel Parser Pro v4.2
php168整站(菁菁整站) v3.0 Beta For PHPWIND v2.x 版
php168整站(菁菁整站) v3.0 Beta For PHPWIND v3.x 版
php168整站(菁菁整站) v3.1 Build 0603
PhpMyDirectory v10.1.3
PHPMyStats v4.0.3 多国语言版
PhpSpy 2005(内含完整版和精简版)
PhpSpy 2006
php特洛伊PhpTr0y v1.0
pLog v1.0 简体中文版
QuiXplorer v2.3.1 多国语言版
R-Blog v1.5.3
SmartIPB v1.4.0.7 [D]
SmartIPB v1.4.0.8 [L] SP1
SupSite v2.3 商业版
SupSite v3.5 商业版
Tatter Tools v0.951 Utf-8 繁体中文正式版
VCard Pro v3.1 Build 4
VERYOK 实用统计 v3.0
VERYOK 实用统计 v4.0
Wimpy AV Player v2.1.5
Wimpy MP3 Player v4.1
最后送大家一首诗:
山高路远坑深,
大军纵横驰奔,
谁敢横刀立马?
惟有点赞加关注大军。