Python 数据分析基础小结 (4)

散点图:matplotlib.pyplot.scatter(x, y, s=None, c=None, marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=None, linewidths=None, verts=None, edgecolors=None, hold=None, data=None,**kwargs)

折线图: matplotlib.pyplot.plot(*args, **kwargs)

直方图:matplotlib.pyplot.bar(left,height,width = 0.8,bottom = None,hold = None,data = None,** kwargs )

饼图:matplotlib.pyplot.pie(x, explode=None, labels=None, colors=None, autopct=None, pctdistance=0.6, shadow=False, labeldistance=1.1, startangle=None, radius=None, counterclock=True, wedgeprops=None, textprops=None, center=(0, 0), frame=False, hold=None, data=None)

箱线图:matplotlib.pyplot.boxplot(x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None, bootstrap=None, usermedians=None, conf_intervals=None, meanline=None, showmeans=None, showcaps=None, showbox=None, showfliers=None, boxprops=None, labels=None, flierprops=None, medianprops=None, meanprops=None, capprops=None, whiskerprops=None, manage_xticks=True, autorange=False, zorder=None, hold=None, data=None)

5、Demo import numpy as np import matplotlib.pyplot as plt box = dict(facecolor='yellow', pad=5, alpha=0.2) fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2) fig.subplots_adjust(left=0.2, wspace=0.6) # Fixing random state for reproducibility np.random.seed(19680801) ax1.plot(2000*np.random.rand(10)) ax1.set_title('ylabels not aligned') ax1.set_ylabel('misaligned 1', bbox=box) ax1.set_ylim(0, 2000) ax3.set_ylabel('misaligned 2',bbox=box) ax3.plot(np.random.rand(10)) labelx = -0.3 # axes coords ax2.set_title('ylabels aligned') ax2.plot(2000*np.random.rand(10)) ax2.set_ylabel('aligned 1', bbox=box) ax2.yaxis.set_label_coords(labelx, 0.5) ax2.set_ylim(0, 2000) ax4.plot(np.random.rand(10)) ax4.set_ylabel('aligned 2', bbox=box) ax4.yaxis.set_label_coords(labelx, 0.5) plt.show() 五、完整Demo import numpy as np import pandas as pd airline_data = pd.read_csv("../data/air_data.csv", encoding="gb18030") #导入航空数据 print('原始数据的形状为:',airline_data.shape) ## 去除票价为空的记录 exp1 = airline_data["SUM_YR_1"].notnull() exp2 = airline_data["SUM_YR_2"].notnull() exp = exp1 & exp2 airline_notnull = airline_data.loc[exp,:] print('删除缺失记录后数据的形状为:',airline_notnull.shape) #只保留票价非零的,或者平均折扣率不为0且总飞行公里数大于0的记录。 index1 = airline_notnull['SUM_YR_1'] != 0 index2 = airline_notnull['SUM_YR_2'] != 0 index3 = (airline_notnull['SEG_KM_SUM']> 0) & \ (airline_notnull['avg_discount'] != 0) airline = airline_notnull[(index1 | index2) & index3] print('删除异常记录后数据的形状为:',airline.shape) airline_selection = airline[["FFP_DATE","LOAD_TIME", "FLIGHT_COUNT","LAST_TO_END", "avg_discount","SEG_KM_SUM"]] ## 构建L特征 L = pd.to_datetime(airline_selection["LOAD_TIME"]) - \ pd.to_datetime(airline_selection["FFP_DATE"]) L = L.astype("str").str.split().str[0] L = L.astype("int")/30 ## 合并特征 airline_features = pd.concat([L, airline_selection.iloc[:,2:]],axis = 1) print('构建的LRFMC特征前5行为:\n',airline_features.head()) from sklearn.preprocessing import StandardScaler data = StandardScaler().fit_transform(airline_features) np.savez('../tmp/airline_scale.npz',data) print('标准化后LRFMC五个特征为:\n',data[:5,:]) from sklearn.cluster import KMeans #导入kmeans算法 airline_scale = np.load('../tmp/airline_scale.npz')['arr_0'] k = 5 ## 确定聚类中心数 #构建模型 kmeans_model = KMeans(n_clusters = k,n_jobs=4,random_state=123) fit_kmeans = kmeans_model.fit(airline_scale) #模型训练 kmeans_model.cluster_centers_ #查看聚类中心 kmeans_model.labels_ #查看样本的类别标签 #统计不同类别样本的数目 r1 = pd.Series(kmeans_model.labels_).value_counts() print('最终每个类别的数目为:\n',r1) #绘制直方图矩阵 center = kmeans_model.cluster_centers_ names = ['入会时长','最近乘坐过本公司航班','乘坐次数','里程','平均折扣率'] import matplotlib.pyplot as plt %matplotlib inline ax = plt.figure(figsize=(8,8)) for i in range(k): ax1 = ax.add_subplot(k,1,i+1) plt.bar(range(5),center[:,i],width = 0.5) plt.xlabel('类别') plt.ylabel(names[i]) plt.savefig('聚类分析柱形图.png') plt.show() #绘制雷达图 fig = plt.figure(figsize=(8,8)) ax = fig.add_subplot(111, polar=True)# polar参数 angles = np.linspace(0, 2*np.pi, k, endpoint=False) angles = np.concatenate((angles, [angles[0]])) # 闭合 Linecolor = ['bo-','r+:','gD--','yv-.','kp-'] #点线颜色 Fillcolor = ['b','r','g','y','k'] for i in range(k): data = np.concatenate((center[i], [center[i][0]])) # 闭合 ax.plot(angles,data,Linecolor[i], linewidth=2)# 画线 ax.fill(angles, data, facecolor=Fillcolor[i], alpha=0.25)# 填充 ax.set_thetagrids(angles * 180/np.pi, names) ax.set_title("客户分群雷达图", va='bottom')## 设定标题 ax.set_rlim(-1,3)## 设置各指标的最终范围 ax.grid(True)

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/zzdsds.html