KMeans Clustering
KMeans mannequin utilizing pickle:
“`python
import pickle
from flask import Flask, request, jsonify
from sklearn.feature_extraction.textual content import CountVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
app = Flask(__name__)
# Instance checklist of enterprise processes
business_processes = [
“Manage Trades”,
“Customer Relationship Management”,
“Inventory Management”,
“Order Fulfillment”
]
# Vectorize entities utilizing Rely Vectorization
count_vectorizer = CountVectorizer()
count_matrix = count_vectorizer.fit_transform(business_processes)
# Apply Ok-means clustering
num_clusters = len(business_processes)
kmeans = KMeans(n_clusters=num_clusters)
kmeans.match(count_matrix)
# Save KMeans mannequin to a pickle file
with open(‘kmeans_model.pkl’, ‘wb’) as f:
pickle.dump(kmeans, f)
# Load KMeans mannequin from pickle file
with open(‘kmeans_model.pkl’, ‘rb’) as f:
kmeans = pickle.load(f)
# Operate to calculate similarity between enter sentence and clusters
def similarity_to_clusters(input_sentence, business_processes, kmeans, count_vectorizer):
input_sentence_vector = count_vectorizer.rework([input_sentence])
similarities = cosine_similarity(input_sentence_vector, count_matrix)[0]
return similarities
# Set similarity threshold
threshold = 0.5 # Regulate as wanted
@app.route(‘/api/match_business_process’, strategies=[‘POST’])
def match_business_process():
# Get enter sentence from request
input_sentence = request.json.get(‘sentence’)
# Calculate similarity between enter sentence and clusters
cluster_similarities = similarity_to_clusters(input_sentence, business_processes, kmeans, count_vectorizer)
# Discover the highest matched enterprise course of
top_matched_process = None
max_similarity = 0
for i, similarity in enumerate(cluster_similarities):
if similarity > threshold and similarity > max_similarity:
top_matched_process = business_processes[i]
max_similarity = similarity
if top_matched_process:
response = {“matched_business_process”: top_matched_process}
else:
response = {“matched_business_process”: “No matching enterprise course of discovered.”}
return jsonify(response)
if __name__ == ‘__main__’:
app.run(debug=True)
“`
On this modified code:
– We save the KMeans mannequin to a pickle file named `’kmeans_model.pkl’` after becoming it.
– When the Flask software begins, it hundreds the KMeans mannequin from the pickle file.
– This manner, the KMeans mannequin might be reused throughout totally different situations of the Flask software with no need to retrain it each time.
Logistic Regression
from sklearn.feature_extraction.textual content import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
# Pattern knowledge (substitute along with your precise knowledge)
sentences = [“Sentence 1 text”, “Sentence 2 text”, …]
labels = [[1, 0, 1], [0, 1, 0], …] # Instance label: [1, 0, 1] signifies presence of entities from all 3 teams
# Characteristic extraction
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(sentences)
# Cut up knowledge into practice and take a look at units
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)
# Practice multi-label classifier (e.g., Logistic Regression)
classifier = LogisticRegression()
classifier.match(X_train, y_train)
# Consider mannequin
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(“Accuracy:”, accuracy)
print(classification_report(y_test, y_pred))
# Prediction instance
new_sentence = “New sentence textual content”
new_sentence_vec = vectorizer.rework([new_sentence])
predicted_label = classifier.predict(new_sentence_vec)
print(“Predicted label:”, predicted_label)
Logistic Regression with multi label classifier
from sklearn.feature_extraction.textual content import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import MultiLabelBinarizer
# Pattern knowledge (substitute along with your precise knowledge)
sentences = [“Sentence 1 text”, “Sentence 2 text”, …]
labels = [[1, 0, 1], [0, 1, 0], …] # Instance labels: [1, 0, 1] signifies presence of entities from all 3 teams
# Characteristic extraction
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(sentences)
# Multi-label binarization
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(labels)
# Cut up knowledge into practice and take a look at units
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Practice multi-label classifier (Binary Relevance with Logistic Regression)
classifier = MultiOutputClassifier(LogisticRegression())
classifier.match(X_train, y_train)
# Consider mannequin
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(“Accuracy:”, accuracy)
print(classification_report(y_test, y_pred, target_names=mlb.classes_))
# Prediction instance
new_sentence = “New sentence textual content”
new_sentence_vec = vectorizer.rework([new_sentence])
predicted_labels = classifier.predict(new_sentence_vec)
predicted_entities = mlb.inverse_transform(predicted_labels)
print(“Predicted entities:”, predicted_entities)
Random forest with multi label
from sklearn.ensemble import RandomForestClassifier
# Practice multi-label classifier (Binary Relevance with Random Forest)
classifier = MultiOutputClassifier(RandomForestClassifier())
classifier.match(X_train, y_train)
# Consider mannequin
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(“Accuracy:”, accuracy)
print(classification_report(y_test, y_pred, target_names=mlb.classes_))
# Prediction instance
new_sentence = “New sentence textual content”
new_sentence_vec = vectorizer.rework([new_sentence])
predicted_labels = classifier.predict(new_sentence_vec)
predicted_entities = mlb.inverse_transform(predicted_labels)
print(“Predicted entities:”, predicted_entities)
Svm with multi class
from sklearn.svm import SVC
# Practice multi-label classifier (Binary Relevance with SVM)
classifier = MultiOutputClassifier(SVC())
classifier.match(X_train, y_train)
# Consider mannequin
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(“Accuracy:”, accuracy)
print(classification_report(y_test, y_pred, target_names=mlb.classes_))
# Prediction instance
new_sentence = “New sentence textual content”
new_sentence_vec = vectorizer.rework([new_sentence])
predicted_labels = classifier.predict(new_sentence_vec)
predicted_entities = mlb.inverse_transform(predicted_labels)
print(“Predicted entities:”, predicted_entities)
Gradient increase with multi label
from sklearn.ensemble import GradientBoostingClassifier
# Practice multi-label classifier (Binary Relevance with Gradient Boosting)
classifier = MultiOutputClassifier(GradientBoostingClassifier())
classifier.match(X_train, y_train)
# Consider mannequin
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(“Accuracy:”, accuracy)
print(classification_report(y_test, y_pred, target_names=mlb.classes_))
# Prediction instance
new_sentence = “New sentence textual content”
new_sentence_vec = vectorizer.rework([new_sentence])
predicted_labels = classifier.predict(new_sentence_vec)
predicted_entities = mlb.inverse_transform(predicted_labels)
print(“Predicted entities:”, predicted_entities)