Customer Lifetime Value Prediction Based On The Marketplace Domain of the E-Commerce Company | by Abdullah Kazimov | Jul, 2024

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Load datasets (assuming df_customers, df_orders, df_order_items are outlined)
prospects = df_customers
orders = df_orders
order_items = df_order_items

# Merge datasets to get related information
merged_data = pd.merge(orders, prospects, on=’customer_id’)
merged_data = pd.merge(merged_data, order_items, on=’order_id’)

# Calculate RFM metrics for every buyer
# Recency: Time since final order
recency = merged_data.groupby(‘customer_id’)[‘order_purchase_timestamp’].max().reset_index()
recency[‘recency’] = (pd.to_datetime(‘2024-01-01’) – pd.to_datetime(recency[‘order_purchase_timestamp’])).dt.days
recency = recency[[‘customer_id’, ‘recency’]]

# Frequency: Variety of orders per buyer
frequency = merged_data.groupby(‘customer_id’)[‘order_id’].nunique().reset_index()
frequency.columns = [‘customer_id’, ‘frequency’]

# Financial: Whole spending per buyer
financial = merged_data.groupby(‘customer_id’)[‘price’].sum().reset_index()
financial.columns = [‘customer_id’, ‘monetary’]

# Merge RFM metrics
rfm = pd.merge(recency, frequency, on=’customer_id’)
rfm = pd.merge(rfm, financial, on=’customer_id’)

# Outline CLV: Lifetime worth based mostly on historic information (sum of financial values)
clv_data = rfm.copy()
clv_data[‘CLV’] = clv_data[‘monetary’]

# Put together information for modeling
X = clv_data[[‘recency’, ‘frequency’, ‘monetary’]]
y = clv_data[‘CLV’]

# Cut up information into coaching and testing units
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize Random Forest regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Practice the Random Forest mannequin
print(‘Coaching Random Forest Regressor mannequin…’)
rf_model.match(X_train, y_train)

# Predict CLV on the take a look at set
y_pred_rf = rf_model.predict(X_test)

# Consider the Random Forest mannequin
mse_rf = mean_squared_error(y_test, y_pred_rf)
print(f’Imply Squared Error for Random Forest Regressor mannequin: {mse_rf:.2f}’)

# Initialize Gradient Boosting regressor
gb_model = GradientBoostingRegressor(random_state=42)

# Practice the Gradient Boosting mannequin
print(‘Coaching Gradient Boosting Regressor mannequin…’)
gb_model.match(X_train, y_train)

# Predict CLV on the take a look at set
y_pred_gb = gb_model.predict(X_test)

# Consider the Gradient Boosting mannequin
mse_gb = mean_squared_error(y_test, y_pred_gb)
print(f’Imply Squared Error for Gradient Boosting Regressor mannequin: {mse_gb:.2f}’)

# Initialize XGBoost regressor
xgb_model = XGBRegressor(random_state=42)

# Practice the XGBoost mannequin
print(‘Coaching XGBoost Regressor mannequin…’)
xgb_model.match(X_train, y_train)

# Predict CLV on the take a look at set
y_pred_xgb = xgb_model.predict(X_test)

# Consider the XGBoost mannequin
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
print(f’Imply Squared Error for XGBoost Regressor mannequin: {mse_xgb:.2f}’)

# Initialize Linear Regression mannequin
lr_model = LinearRegression()

# Practice the Linear Regression mannequin
print(‘Coaching Linear Regression mannequin…’)
lr_model.match(X_train, y_train)

# Predict CLV on the take a look at set
y_pred_lr = lr_model.predict(X_test)

# Consider the Linear Regression mannequin
mse_lr = mean_squared_error(y_test, y_pred_lr)
print(f’Imply Squared Error for Linear Regression mannequin: {mse_lr:.2f}’)

# Initialize Ridge Regression mannequin
ridge_model = Ridge()

# Practice the Ridge Regression mannequin
print(‘Coaching Ridge Regression mannequin…’)
ridge_model.match(X_train, y_train)

# Predict CLV on the take a look at set
y_pred_ridge = ridge_model.predict(X_test)

# Consider the Ridge Regression mannequin
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
print(f’Imply Squared Error for Ridge Regression mannequin: {mse_ridge:.2f}’)

# Initialize Lasso Regression mannequin
lasso_model = Lasso()

# Practice the Lasso Regression mannequin
print(‘Coaching Lasso Regression mannequin…’)
lasso_model.match(X_train, y_train)

# Predict CLV on the take a look at set
y_pred_lasso = lasso_model.predict(X_test)

# Consider the Lasso Regression mannequin
mse_lasso = mean_squared_error(y_test, y_pred_lasso)
print(f’Imply Squared Error for Lasso Regression mannequin: {mse_lasso:.2f}’)

# Initialize Elastic Internet Regression mannequin
elastic_net_model = ElasticNet()

# Practice the Elastic Internet Regression mannequin
print(‘Coaching Elastic Internet Regression mannequin…’)
elastic_net_model.match(X_train, y_train)

# Predict CLV on the take a look at set
y_pred_elastic_net = elastic_net_model.predict(X_test)

# Consider the Elastic Internet Regression mannequin
mse_elastic_net = mean_squared_error(y_test, y_pred_elastic_net)
print(f’Imply Squared Error for Elastic Internet Regression mannequin: {mse_elastic_net:.2f}’)

# Initialize Help Vector Regression mannequin with linear kernel
svr_model = SVR(kernel=’linear’)

# Practice the Help Vector Regression mannequin
print(‘Coaching Help Vector Regression (Linear Kernel) mannequin…’)
svr_model.match(X_train, y_train)

# Predict CLV on the take a look at set
y_pred_svr = svr_model.predict(X_test)

# Consider the Help Vector Regression mannequin
mse_svr = mean_squared_error(y_test, y_pred_svr)
print(f’Imply Squared Error for Help Vector Regression (Linear Kernel) mannequin: {mse_svr:.2f}’)

# Plotting outcomes for comparability
plt.determine(figsize=(14, 10))

# Random Forest Regressor
plt.subplot(3, 3, 1)
plt.scatter(y_test, y_pred_rf, coloration=’blue’, label=’Random Forest’)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], ‘–k’, lw=2)
plt.xlabel(‘Precise CLV’)
plt.ylabel(‘Predicted CLV’)
plt.title(‘Random Forest Regressor’)
plt.legend()
plt.grid(True)

# Gradient Boosting Regressor
plt.subplot(3, 3, 2)
plt.scatter(y_test, y_pred_gb, coloration=’inexperienced’, label=’Gradient Boosting’)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], ‘–k’, lw=2)
plt.xlabel(‘Precise CLV’)
plt.ylabel(‘Predicted CLV’)
plt.title(‘Gradient Boosting Regressor’)
plt.legend()
plt.grid(True)

# XGBoost Regressor
plt.subplot(3, 3, 3)
plt.scatter(y_test, y_pred_xgb, coloration=’crimson’, label=’XGBoost’)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], ‘–k’, lw=2)
plt.xlabel(‘Precise CLV’)
plt.ylabel(‘Predicted CLV’)
plt.title(‘XGBoost Regressor’)
plt.legend()
plt.grid(True)

# Linear Regression
plt.subplot(3, 3, 4)
plt.scatter(y_test, y_pred_lr, coloration=’cyan’, label=’Linear Regression’)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], ‘–k’, lw=2)
plt.xlabel(‘Precise CLV’)
plt.ylabel(‘Predicted CLV’)
plt.title(‘Linear Regression’)
plt.legend()
plt.grid(True)

# Ridge Regression
plt.subplot(3, 3, 5)
plt.scatter(y_test, y_pred_ridge, coloration=’magenta’, label=’Ridge Regression’)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], ‘–k’, lw=2)
plt.xlabel(‘Precise CLV’)
plt.ylabel(‘Predicted CLV’)
plt.title(‘Ridge Regression’)
plt.legend()
plt.grid(True)

# Lasso Regression
plt.subplot(3, 3, 6)
plt.scatter(y_test, y_pred_lasso, coloration=’yellow’, label=’Lasso Regression’)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], ‘–k’, lw=2)
plt.xlabel(‘Precise CLV’)
plt.ylabel(‘Predicted CLV’)
plt.title(‘Lasso Regression’)
plt.legend()
plt.grid(True)

# Elastic Internet Regression
plt.subplot(3, 3, 7)
plt.scatter(y_test, y_pred_elastic_net, coloration=’purple’, label=’Elastic Internet Regression’)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], ‘–k’, lw=2)
plt.xlabel(‘Precise CLV’)
plt.ylabel(‘Predicted CLV’)
plt.title(‘Elastic Internet Regression’)
plt.legend()
plt.grid(True)

# Help Vector Regression (Linear Kernel)
plt.subplot(3, 3, 8)
plt.scatter(y_test, y_pred_svr, coloration=’orange’, label=’SVR (Linear Kernel)’)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], ‘–k’, lw=2)
plt.xlabel(‘Precise CLV’)
plt.ylabel(‘Predicted CLV’)
plt.title(‘SVR (Linear Kernel)’)
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.present()

Source link

Customer Lifetime Value Prediction Based On The Marketplace Domain of the E-Commerce Company | by Abdullah Kazimov | Jul, 2024

Working with Input-Convex Neural Networks part3(Machine Learning 2024) | by Monodeep Mukherjee | Jul, 2024

Embracing the Future: The Rise of AI-Driven Development in Software Engineering The software… | by DevBlogs | Jul, 2024

Research on Metaheuristic methods part4(Machine Learning 2024) | by Monodeep Mukherjee | Jul, 2024

How Real-Time Data Analytics and AI Are Transforming Heavy Equipment Operations

NVIDIA Accelerates Google Quantum AI Processor Design With Simulation of Quantum Device Physics

Game Development and Cloud Computing: Benefits of Cloud-Native Game Servers

Teradata AI Unlimited in Microsoft Fabric is Now Available for Public Preview through Microsoft Fabric Workload Hub

Cognigy Unveils Agentic AI: Transforming the Future of Enterprise Contact Centers

Our Picks

Real-Life Examples of Artificial Intelligence Subfield Series — Day 9 (Linear regression) | by Hemanshi | Jun, 2024

Beyond the Basics: An In-Depth Guide to Optimization Functions, Back Propagation and Gradient Descent Variants | by Mayuri Deshpande | Jun, 2024

AI and Tourism Industry

Most Popular

Revolutionizing the Way We Find Love

Will GenAI Replace Data Engineers? No – And Here’s Why.

Assortment Optimization Machine Learning | by Danishaliarshar | Mar, 2024

Customer Lifetime Value Prediction Based On The Marketplace Domain of the E-Commerce Company | by Abdullah Kazimov | Jul, 2024

Related Posts