Added the Originality.AI benchmark to samples and test_zippy_detect.py

Signed-off-by: Jacob Torrey <jacob@thinkst.com>
main
Jacob Torrey 2025-02-27 13:16:35 -07:00
rodzic f439ebbb67
commit 0ecd771a4d
4 zmienionych plików z 49913 dodań i 838 usunięć

Plik binarny nie jest wyświetlany.

Przed

Szerokość:  |  Wysokość:  |  Rozmiar: 82 KiB

Po

Szerokość:  |  Wysokość:  |  Rozmiar: 82 KiB

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Wyświetl plik

@ -1,6 +1,6 @@
#!/usr/bin/env python3
import pytest, os, jsonlines, csv
import pytest, os, jsonlines, csv, random
from warnings import warn
from zippy.zippy import Zippy, EnsembledZippy, PRELUDE_STR, LzmaLlmDetector, BrotliLlmDetector, ZlibLlmDetector, CompressionEngine
import zippy.zippy
@ -14,6 +14,8 @@ NUM_JSONL_SAMPLES = 500
ai_files = os.listdir(AI_SAMPLE_DIR)
human_files = os.listdir(HUMAN_SAMPLE_DIR)
random.seed('ZIPPY TESTING SEED') # Allow for randomizing the test sets, but with a fixed seen for repeatability
CONFIDENCE_THRESHOLD : float = 0.00 # What confidence to treat as error vs warning
# Bool on whether to ensemble the models or run a single model
@ -204,3 +206,24 @@ def test_gptzero_eval_dataset_ai(i, record_property):
(classification, score) = zippy.run_on_text_chunked(i.get('Document', ''), prelude_ratio=PRELUDE_RATIO)
record_property("score", str(score))
assert classification == i.get('Label'), GPTZERO_EVAL_FILE + ':' + str(i.get('Index')) + ' was misclassified with confidence ' + str(round(score, 8))
ORIGINALITY_EVAL_FILE = 'samples/originality-2000_samples_benchmark.csv'
originality_samples = []
with open(ORIGINALITY_EVAL_FILE) as fp:
csvr = csv.DictReader(fp)
for obj in csvr:
if len(obj.get('text', '')) >= MIN_LEN:
originality_samples.append(obj)
@pytest.mark.parametrize('i', random.sample(list(filter(lambda x: x.get('label') == 'human-written', originality_samples)), NUM_JSONL_SAMPLES))
def test_originality_eval_dataset_human(i, record_property):
(classification, score) = zippy.run_on_text_chunked(i.get('text', ''), prelude_ratio=PRELUDE_RATIO)
record_property("score", str(score))
assert classification == 'Human', ORIGINALITY_EVAL_FILE + ':' + str(i.get('dataset')) + ' was misclassified with confidence ' + str(round(score, 8))
@pytest.mark.parametrize('i', random.sample(list(filter(lambda x: x.get('label') == 'ai-generated' and x.get('dataset') != 'paraphrase', originality_samples)), NUM_JSONL_SAMPLES))
def test_originality_eval_dataset_ai(i, record_property):
(classification, score) = zippy.run_on_text_chunked(i.get('text', ''), prelude_ratio=PRELUDE_RATIO)
record_property("score", str(score))
assert classification == 'AI', ORIGINALITY_EVAL_FILE + ':' + str(i.get('dataset')) + ' was misclassified with confidence ' + str(round(score, 8))