import pandas as pd
import numpy as np
downpath='/Users/kangyongqing/Downloads/'
downfile='20230725_105033.csv'
dd=pd.read_csv(downpath+downfile)
dd.rename(columns={'student_user_id':'学生id'},inplace=True)
result=[]
for i in range(dd.shape[0]):
user,feetime=dd.loc[i,['学生id','付费时间']].values
tmp=dd[(dd['学生id']==user)]
feemin=tmp['付费时间'].min()
result.append(feemin)
result=pd.DataFrame(result,columns=['首付费时间'])
dd1=pd.concat((dd,result),axis=1)
dd1['xulie']=dd1.groupby('学生id').付费时间.rank(method='first',ascending=True)
print(dd1.head(2).T)
dd1['学生id']=dd1['学生id'].astype(np.int64).astype(str)
dd1['parent_user_id']=dd1['parent_user_id'].astype(np.int64).astype(str)
dd1.to_excel(downpath+'ceshi.xlsx')
知识点:
- 首次付费时间计算;
- 用户第一单记录,rank排序实现;
- 长id转换字符串