kopia lustrzana https://github.com/thinkst/zippy
Added OpenAI connector and pytest harness
Signed-off-by: Jacob Torrey <jacob@thinkst.com>pull/6/head
rodzic
d1f5562602
commit
a9be80e94b
|
@ -0,0 +1,46 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import os, requests
|
||||
from typing import Optional, Dict, Tuple
|
||||
|
||||
MODEL_NAME = 'model-detect-v2'
|
||||
API_KEY = os.getenv('OPENAI_API_KEY')
|
||||
API_URL = 'https://api.openai.com/v1/completions'
|
||||
|
||||
def make_req(text : str) -> Optional[Dict]:
|
||||
if len(text) < 1000:
|
||||
print("Input too short for OpenAI to classify")
|
||||
return None
|
||||
headers = {
|
||||
'authorization': 'Bearer ' + API_KEY,
|
||||
'origin': 'https://platform.openai.com',
|
||||
'openai-organization': 'org-gxAZne8U4jJ8pb632XJBLH1i'
|
||||
}
|
||||
data = {
|
||||
'prompt': text + '<disc_score|>',
|
||||
'max_tokens': 1,
|
||||
'temperature': 1,
|
||||
'top_p': 1,
|
||||
'n': 1,
|
||||
'model': MODEL_NAME,
|
||||
'stream': False,
|
||||
'stop': '\\n',
|
||||
'logprobs': 5
|
||||
}
|
||||
res = requests.post(API_URL, headers=headers, json=data)
|
||||
return res.json().get('choices', [None])[0]
|
||||
|
||||
def run_on_file(fn : str) -> Optional[Tuple[str, float]]:
|
||||
with open(fn, 'r') as fp:
|
||||
contents = fp.read()
|
||||
res = make_req(contents)
|
||||
if res is None:
|
||||
print("Unable to classify!")
|
||||
return None
|
||||
else:
|
||||
#print(res)
|
||||
if res.get('text') == '"':
|
||||
return ('AI', abs(res.get('logprobs').get('token_logprobs')[0]))
|
||||
elif res.get('text') == '!':
|
||||
return ('Human', abs(res.get('logprobs').get('token_logprobs')[0]))
|
||||
return None #res.get('text')
|
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import pytest, os
|
||||
from warnings import warn
|
||||
from openai_detect import run_on_file
|
||||
|
||||
AI_SAMPLE_DIR = 'samples/llm-generated/'
|
||||
HUMAN_SAMPLE_DIR = 'samples/human-generated/'
|
||||
|
||||
ai_files = os.listdir(AI_SAMPLE_DIR)
|
||||
ai_files = filter(lambda f: os.path.getsize(AI_SAMPLE_DIR + f) >= 1000, ai_files)
|
||||
human_files = os.listdir(HUMAN_SAMPLE_DIR)
|
||||
human_files = filter(lambda f: os.path.getsize(HUMAN_SAMPLE_DIR + f) >= 1000, human_files)
|
||||
|
||||
def test_training_file():
|
||||
assert run_on_file('ai-generated.txt')[0] == 'AI', 'The training corpus should always be detected as AI-generated... since it is'
|
||||
|
||||
@pytest.mark.parametrize('f', human_files)
|
||||
def test_human_samples(f):
|
||||
(classification, score) = run_on_file(HUMAN_SAMPLE_DIR + f)
|
||||
assert classification == 'Human', f + ' is a human-generated file, misclassified as AI-generated with confidence ' + str(round(score, 8))
|
||||
|
||||
@pytest.mark.parametrize('f', ai_files)
|
||||
def test_llm_sample(f):
|
||||
(classification, score) = run_on_file(AI_SAMPLE_DIR + f)
|
||||
assert classification == 'AI', f + ' is an LLM-generated file, misclassified as human-generated with confidence ' + str(round(score, 8))
|
Ładowanie…
Reference in New Issue