文章目录
- 字节对象与字符转换
- 动态爬虫实施辅助工具
- python 部分内置方法示例:
- python虚拟环境的创建与激活
- 执行多个.py程序
- 获取当前系统默认浏览器名称
- 反编译小程序
- PC端小程序(wxapkg)解密:
- 反编译
字节对象与字符转换
data1 = b'abc' # bytes object
data2 = 'abc' # str object
## 相互转换
sb = bytes(data2 , encoding = "utf8") # str to bytes
# 或者:
sb = str.encode(data2 ) # str to bytes
bs = str(data1 , encoding = "utf8") # bytes to str
# 或者:
bs = bytes.decode(data1)
# 与python版本也有关系 2 3 版本执行结果 不一致
动态爬虫实施辅助工具
依赖 selenium Web应用程序测试的工具 ,转变思路可用来爬取js动态渲染的网页
谷歌的 chromedriver 驱动 (要使用其它浏览器下载相关driver 驱动)
bs4 也就是BeautifulSoup 用来解析html
python 部分内置方法示例:
class People(object):
"""注解:人类"""
# 初始化函数
def __init__(self, name):
self.name = name
# 析构函数,一般解释器会自动分配和释放内存,
# 所以,析构函数的调用是由解释器在进行垃圾回收时自动触发执行的。
def __del__(self):
pass
# print输出对象调用 print(people)
def __str__(self):
return "str函数被调用"
# 直接使用对象时调用 people
def __repr__(self):
return "repr函数被调用"
# 对象当做方法时调用,例people()
def __call__(self, *args, **kwargs):
print("call函数被调用")
# 用于索引操作,可如字典般使用
# 获取对象属性,people['name']
def __getitem__(self, item):
return getattr(self, item)
# 设置对象属性,people['name'] = 'Jack'
def __setitem__(self, key, value):
setattr(self, key, value)
# 删除对象属性,del people['name']
def __delitem__(self, key):
delattr(self, key)
# 正常情况下,当我们调用类的方法或属性时,如果不存在,就会报错
# 当对未定义的属性名称和实例进行点号运算时,就会用属性名作为字符串调用这个方法
def __getattr__(self, item):
if item == 'age':
return 18
# 会拦截所有属性的的赋值语句。如果定义了这个方法,self.arrt = value 就会变成self,__setattr__("attr", value)
# 当在__setattr__方法内对属性进行赋值是,不可使用self.attr = value,因为他会再次调用self,__setattr__("attr", value),则会形成无穷递归循环,最后导致堆栈溢出异常
# 应该通过对属性字典做索引运算来赋值任何实例属性,也就是使用self.__dict__['name'] = value
def __setattr__(self, key, value):
super().__setattr__(key, value)
def __delattr__(self, key):
super().__delattr__(key)
people = People('Jack')
print(people.__doc__) # 注解:人类
print(People.__doc__) # 注解:人类
print(people.__module__) # __main__
print(People.__module__) # __main__
print(people.__class__) # <class '__main__.People'>
print(People.__class__) # <class 'type'>
print(people.__dict__) # {}
# {'__module__': '__main__', '__doc__': '注解:人类',
# '__init__': <function People.__init__ at 0x00000000037957B8>,
# '__del__': <function People.__del__ at 0x000000000D8A2A60>,
# '__str__': <function People.__str__ at 0x000000000D8A2AE8>,
# '__repr__': <function People.__repr__ at 0x000000000D8A2B70>,
# '__call__': <function People.__call__ at 0x000000000D8A2BF8>,
# '__getitem__': <function People.__getitem__ at 0x000000000D8A2C80>,
# '__setitem__': <function People.__setitem__ at 0x000000000D8A2D08>,
# '__delitem__': <function People.__delitem__ at 0x000000000D8A2D90>,
# '__getattr__': <function People.__getattr__ at 0x000000000D8A2E18>,
# '__setattr__': <function People.__setattr__ at 0x000000000D8A2EA0>,
# '__delattr__': <function People.__delattr__ at 0x000000000D8A2F28>,
# '__dict__': <attribute '__dict__' of 'People' objects>,
# '__weakref__': <attribute '__weakref__' of 'People' objects>}
print(People.__dict__)
print(People) # <class '__main__.People'>
print(people) # str函数被调用
people # 啥也输出,不知道为啥
people() # call函数被调用
print(people['name']) # Jack
people['name'] = 'Tom'
print(people['name']) # Tom
del people['name']
print(people['name']) # None
print(people.age) # 18
参考:https://www.jianshu.com/p/401eaf3941c1
python虚拟环境的创建与激活
系统环境:python3.7 win10,使用pthon自带的 venv
python -m venv [虚拟环境名称]
会在当前目录下生成一个虚拟环境同名的文件夹
激活虚拟环境
cd [虚拟环境名称]\Scripts
./activate
## 有些可以直接执行[虚拟环境名称]\Scripts\activate
## 但我在windows 下的 powershell报错,就按上面的方式执行了
关闭环境
## 同激活环境一样不过执行的是 deactivate
./deactivate
执行成功后命名行前面会有标识,如:
执行多个.py程序
import os
os.system("python one.py")
os.system("python two.py")
os.system("python three.py")
...
获取当前系统默认浏览器名称
此代码是通过windows系统的注册表中查找
from winreg import HKEY_CURRENT_USER, OpenKey, QueryValueEx
reg_path = r'Software\Microsoft\Windows\Shell\Associations\UrlAssociations\https\UserChoice'
with OpenKey(HKEY_CURRENT_USER, reg_path) as key:
print(QueryValueEx(key, 'ProgId')[0])
反编译小程序
PC端小程序(wxapkg)解密:
#!/usr/bin/env python3
import hashlib
import struct
from Crypto.Cipher import AES
# 解密pc中拷出来的微信小程序
def unpad(s): return s[0:(len(s) - s[-1])]
# AES-128-CBC解密
def aescbcDecrypt(src, key, iv):
aes_obj = AES.new(key, AES.MODE_CBC, iv)
decrypt_buf = aes_obj.decrypt(src)
return unpad(decrypt_buf)
def decdata(encfile, wxid):
outfile = 'dec_'+encfile
with open(encfile, 'rb')as file:
data = file.read()
l = len(data)
if l < 6:
print('file too small')
return -1
if not data.startswith('V1MMWX'.encode('utf-8')):
print('file format error')
return -2
dk = hashlib.pbkdf2_hmac('sha1', wxid.encode(
'utf-8'), 'saltiest'.encode('utf-8'), 1000, 32)
iv = 'the iv: 16 bytes'
if l > 1024+6:
aesdata = data[6:1024+6]
xordata = data[1024+6:]
else:
aesdata = data[6:]
decout1 = aescbcDecrypt(aesdata, dk, iv.encode('utf-8'))
if l > 1024+6:
if len(wxid) < 2:
xorkey = 0x66
else:
xorkey = wxid.encode('utf-8')[-2]
ll = len(xordata)
fmt = '%dB' % ll
s = struct.unpack(fmt, xordata)
decout2 = struct.pack(fmt, *(a ^ xorkey for a in s))
poutf = open(outfile, 'wb')
poutf.write(decout1)
poutf.write(decout2)
poutf.close()
else:
poutf = open(outfile, 'wb')
poutf.write(decout1)
poutf.close()
print('decrypt ok,write file', outfile)
return 0
return 1
if __name__ == '__main__':
print('start')
fname = 'APP2.wxapkg'
wxid = 'wxc07f9d67923d676d'
decdata(fname, wxid)
反编译
# coding: utf-8
# py2 origin author lrdcq
# usage python3 unwxapkg.py filename
__author__ = 'Integ: https://github.com./integ'
import sys, os
import struct
class WxapkgFile(object):
nameLen = 0
name = ""
offset = 0
size = 0
if len(sys.argv) < 2:
print('usage: unwxapkg.py filename [output_dir]')
exit()
with open(sys.argv[1], "rb") as f:
root = os.path.dirname(os.path.realpath(f.name))
name = os.path.basename(f.name) + '_dir'
if len(sys.argv) > 2:
name = sys.argv[2]
#read header
firstMark = struct.unpack('B', f.read(1))[0]
print('first header mark = {}'.format(firstMark))
info1 = struct.unpack('>L', f.read(4))[0]
print('info1 = {}'.format(info1))
indexInfoLength = struct.unpack('>L', f.read(4))[0]
print('indexInfoLength = {}'.format(indexInfoLength))
bodyInfoLength = struct.unpack('>L', f.read(4))[0]
print('bodyInfoLength = {}'.format(bodyInfoLength))
lastMark = struct.unpack('B', f.read(1))[0]
print('last header mark = {}'.format(lastMark))
if firstMark != 0xBE or lastMark != 0xED:
print('its not a wxapkg file!!!!!')
f.close()
exit()
fileCount = struct.unpack('>L', f.read(4))[0]
print('fileCount = {}'.format(fileCount))
#read index
fileList = []
for i in range(fileCount):
data = WxapkgFile()
try:
data.nameLen = struct.unpack('>L', f.read(4))[0]
data.name = f.read(data.nameLen)
data.offset = struct.unpack('>L', f.read(4))[0]
data.size = struct.unpack('>L', f.read(4))[0]
except:
continue
# print('readFile = {} at Offset = {}'.format(str(data.name, encoding = "utf-8"), data.offset))
fileList.append(data)
#save files
for d in fileList:
d.name = '/' + name + str(d.name, encoding = "utf-8")
path = root + os.path.dirname(d.name)
if not os.path.exists(path):
os.makedirs(path)
w = open(root + d.name, 'wb')
f.seek(d.offset)
w.write(f.read(d.size))
w.close()
print('writeFile = {}{}'.format(root, d.name))
f.close()
使用方式:
pc端的wxapkg:先解密==》再反编译解包
手机中获取的wxapkg:直接反编译
解密:修改文件中的wxid 和wxapkg路径 直接执行文件
反编译:执行时wxapkg路径加在执行语句后面
示例:
python zhao.py dec_APP2.wxapkg
注意:python的反编译没生效,用nodejs 来反编译