2023-07-11 23:50:08 +02:00
|
|
|
import time
|
2023-09-16 05:11:16 +02:00
|
|
|
|
2023-07-11 23:50:08 +02:00
|
|
|
import numpy as np
|
2023-09-16 05:11:16 +02:00
|
|
|
from numpy.linalg import norm
|
2023-07-11 23:50:08 +02:00
|
|
|
|
2023-11-17 03:03:06 +01:00
|
|
|
from extensions.openai.embeddings import get_embeddings
|
|
|
|
|
2023-07-12 20:33:25 +02:00
|
|
|
moderations_disabled = False # return 0/false
|
2023-07-11 23:50:08 +02:00
|
|
|
category_embeddings = None
|
|
|
|
antonym_embeddings = None
|
2023-07-12 20:33:25 +02:00
|
|
|
categories = ["sexual", "hate", "harassment", "self-harm", "sexual/minors", "hate/threatening", "violence/graphic", "self-harm/intent", "self-harm/instructions", "harassment/threatening", "violence"]
|
2023-07-11 23:50:08 +02:00
|
|
|
flag_threshold = 0.5
|
|
|
|
|
|
|
|
|
2023-07-24 16:28:12 +02:00
|
|
|
def get_category_embeddings() -> dict:
|
2023-07-11 23:50:08 +02:00
|
|
|
global category_embeddings, categories
|
|
|
|
if category_embeddings is None:
|
2023-07-24 16:28:12 +02:00
|
|
|
embeddings = get_embeddings(categories).tolist()
|
2023-07-11 23:50:08 +02:00
|
|
|
category_embeddings = dict(zip(categories, embeddings))
|
|
|
|
|
|
|
|
return category_embeddings
|
|
|
|
|
|
|
|
|
2023-07-24 16:28:12 +02:00
|
|
|
def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
|
2023-07-11 23:50:08 +02:00
|
|
|
return np.dot(a, b) / (norm(a) * norm(b))
|
|
|
|
|
|
|
|
|
|
|
|
# seems most openai like with all-mpnet-base-v2
|
2023-07-24 16:28:12 +02:00
|
|
|
def mod_score(a: np.ndarray, b: np.ndarray) -> float:
|
2023-07-11 23:50:08 +02:00
|
|
|
return 2.0 * np.dot(a, b)
|
|
|
|
|
|
|
|
|
|
|
|
def moderations(input):
|
|
|
|
global category_embeddings, categories, flag_threshold, moderations_disabled
|
|
|
|
results = {
|
|
|
|
"id": f"modr-{int(time.time()*1e9)}",
|
|
|
|
"model": "text-moderation-001",
|
|
|
|
"results": [],
|
|
|
|
}
|
|
|
|
|
2023-07-24 16:28:12 +02:00
|
|
|
if moderations_disabled:
|
2023-07-11 23:50:08 +02:00
|
|
|
results['results'] = [{
|
2023-07-12 20:33:25 +02:00
|
|
|
'categories': dict([(C, False) for C in categories]),
|
|
|
|
'category_scores': dict([(C, 0.0) for C in categories]),
|
2023-07-11 23:50:08 +02:00
|
|
|
'flagged': False,
|
|
|
|
}]
|
|
|
|
return results
|
|
|
|
|
|
|
|
category_embeddings = get_category_embeddings()
|
|
|
|
|
|
|
|
# input, string or array
|
|
|
|
if isinstance(input, str):
|
|
|
|
input = [input]
|
|
|
|
|
|
|
|
for in_str in input:
|
2023-07-24 16:28:12 +02:00
|
|
|
for ine in get_embeddings([in_str]):
|
2023-07-12 20:33:25 +02:00
|
|
|
category_scores = dict([(C, mod_score(category_embeddings[C], ine)) for C in categories])
|
|
|
|
category_flags = dict([(C, bool(category_scores[C] > flag_threshold)) for C in categories])
|
2023-07-11 23:50:08 +02:00
|
|
|
flagged = any(category_flags.values())
|
|
|
|
|
|
|
|
results['results'].extend([{
|
|
|
|
'flagged': flagged,
|
|
|
|
'categories': category_flags,
|
|
|
|
'category_scores': category_scores,
|
|
|
|
}])
|
|
|
|
|
|
|
|
print(results)
|
|
|
|
|
2023-07-12 20:33:25 +02:00
|
|
|
return results
|