客户细分需要解决的问题是按照客户之间的相似特征区分不同客户群体。这个问题的先决条件中没有可供使用的客户分类列表,只有客户的人物画像。
数据集已有的数据是公司的历史商业活动记录以及客户的购买记录。
offer.csv:
transaction.csv:
Customer Last Name,Offer # Smith,2 Smith,24 Johnson,17 Johnson,24 Johnson,26 Williams,18 Williams,22 Williams,31 Brown,7 Brown,29 Brown,30 Jones,8 Miller,6 Miller,10 Miller,14 Miller,15 Miller,22 Miller,23 Miller,31 Davis,12 Davis,22 Davis,25 Garcia,14 Garcia,15 Rodriguez,2 Rodriguez,26 Wilson,8 Wilson,30 Martinez,12 Martinez,25 Martinez,28 Anderson,24 Anderson,26 Taylor,7 Taylor,18 Taylor,29 Taylor,30 Thomas,1 Thomas,4 Thomas,9 Thomas,11 Thomas,14 Thomas,26 Hernandez,28 Hernandez,29 Moore,17 Moore,24 Martin,2 Martin,11 Martin,28 Jackson,1 Jackson,2 Jackson,11 Jackson,15 Jackson,22 Thompson,9 Thompson,16 Thompson,25 Thompson,30 White,14 White,22 White,25 White,30 Lopez,9 Lopez,11 Lopez,15 Lopez,16 Lopez,27 Lee,3 Lee,4 Lee,6 Lee,22 Lee,27 Gonzalez,9 Gonzalez,31 Harris,4 Harris,6 Harris,7 Harris,19 Harris,22 Harris,27 Clark,4 Clark,11 Clark,28 Clark,31 Lewis,7 Lewis,8 Lewis,30 Robinson,7 Robinson,29 Walker,18 Walker,29 Perez,18 Perez,30 Hall,11 Hall,22 Young,6 Young,9 Young,15 Young,22 Young,31 Young,32 Allen,9 Allen,27 Sanchez,4 Sanchez,5 Sanchez,14 Sanchez,15 Sanchez,20 Sanchez,22 Sanchez,26 Wright,4 Wright,6 Wright,21 Wright,27 King,7 King,13 King,18 King,29 Scott,6 Scott,14 Scott,23 Green,7 Baker,7 Baker,10 Baker,19 Baker,31 Adams,18 Adams,29 Adams,30 Nelson,3 Nelson,4 Nelson,8 Nelson,31 Hill,8 Hill,13 Hill,18 Hill,30 Ramirez,9 Campbell,2 Campbell,24 Campbell,26 Mitchell,1 Mitchell,2 Roberts,31 Carter,7 Carter,13 Carter,29 Carter,30 Phillips,17 Phillips,24 Evans,22 Evans,27 Turner,4 Turner,6 Turner,27 Turner,31 Torres,8 Parker,11 Parker,16 Parker,20 Parker,29 Parker,31 Collins,11 Collins,30 Edwards,8 Edwards,27 Stewart,8 Stewart,29 Stewart,30 Flores,17 Flores,24 Morris,17 Morris,24 Morris,26 Nguyen,19 Nguyen,31 Murphy,7 Murphy,12 Rivera,7 Rivera,18 Cook,24 Cook,26 Rogers,3 Rogers,7 Rogers,8 Rogers,19 Rogers,21 Rogers,22 Morgan,8 Morgan,29 Peterson,1 Peterson,2 Peterson,10 Peterson,23 Peterson,26 Peterson,27 Cooper,4 Cooper,16 Cooper,20 Cooper,32 Reed,5 Reed,14 Bailey,7 Bailey,30 Bell,2 Bell,17 Bell,24 Bell,26 Gomez,11 Gomez,20 Gomez,25 Gomez,32 Kelly,6 Kelly,20 Kelly,31 Kelly,32 Howard,11 Howard,12 Howard,22 Ward,4 Cox,2 Cox,17 Cox,24 Cox,26 Diaz,7 Diaz,8 Diaz,29 Diaz,30 Richardson,3 Richardson,6 Richardson,22 Wood,1 Wood,10 Wood,14 Wood,31 Watson,7 Watson,29 Brooks,3 Brooks,8 Brooks,11 Brooks,22 Bennett,8 Bennett,29 Gray,12 Gray,16 Gray,26 James,7 James,8 James,13 James,18 James,30 Reyes,9 Reyes,23 Cruz,29 Cruz,30 Hughes,7 Hughes,8 Hughes,13 Hughes,29 Hughes,30 Price,1 Price,22 Price,30 Price,31 Myers,18 Myers,30 Long,3 Long,7 Long,10 Foster,1 Foster,9 Foster,14 Foster,22 Foster,23 Sanders,1 Sanders,4 Sanders,5 Sanders,6 Sanders,9 Sanders,11 Sanders,20 Sanders,25 Sanders,26 Ross,18 Ross,21 Morales,6 Morales,7 Morales,8 Morales,19 Morales,22 Morales,31 Powell,5 Sullivan,8 Sullivan,13 Sullivan,18 Russell,26 Ortiz,8 Jenkins,24 Jenkins,26 Gutierrez,6 Gutierrez,8 Gutierrez,10 Gutierrez,18 Perry,8 Perry,18 Perry,29 Perry,30 Butler,1 Butler,4 Butler,22 Butler,28 Butler,30 Barnes,10 Barnes,21 Barnes,22 Barnes,31 Fisher,1 Fisher,2 Fisher,11 Fisher,22 Fisher,28 Fisher,30 Fisher,31 预处理需要对两个数据集做关联处理,这样才能得到单一的视图。同时由于需要比较客户所产生的交易,还需要建立一张透视表。行代表客户,列代表商业活动,单元格值则显示是否客户有购买行为。
var offers = Offer.ReadFromCsv(_offersCsv); var transactions = Transaction.ReadFromCsv(_transactionsCsv); var clusterData = (from of in offers join tr in transactions on of.OfferId equals tr.OfferId select new { of.OfferId, of.Campaign, of.Discount, tr.LastName, of.LastPeak, of.Minimum, of.Origin, of.Varietal, Count = 1, }).ToArray(); var count = offers.Count(); var pivotDataArray = (from c in clusterData group c by c.LastName into gcs let lookup = gcs.ToLookup(y => y.OfferId, y => y.Count) select new PivotData() { LastName = gcs.Key, Features = ToFeatures(lookup, count) }).ToArray();