From 9954e2411310bc7622f5fe4dcd199198b5ed4e09 Mon Sep 17 00:00:00 2001 From: Jacob Torrey Date: Fri, 27 Oct 2023 09:54:13 -0600 Subject: [PATCH] Add Windows support and a requirements.txt --- requirements.txt | 4 ++++ zippy.py | 16 ++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..11ab28f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +# Needed for ensembling +numpy +# Needed for brotli compression +brotli \ No newline at end of file diff --git a/zippy.py b/zippy.py index 180fb74..5f17c32 100755 --- a/zippy.py +++ b/zippy.py @@ -40,7 +40,7 @@ def clean_text(s : str) -> str: # The prelude file is a text file containing only AI-generated text, it is used to 'seed' the LZMA dictionary PRELUDE_FILE : str = 'ai-generated.txt' -with open(PRELUDE_FILE, 'r') as fp: +with open(PRELUDE_FILE, 'r', encoding='utf-8') as fp: PRELUDE_STR = clean_text(fp.read()) class AIDetector(ABC): @@ -62,7 +62,7 @@ class BrotliLlmDetector(AIDetector): self.prelude_ratio = prelude_ratio if prelude_file != None: - with open(prelude_file) as fp: + with open(prelude_file, encoding='utf-8') as fp: self.prelude_str = clean_text(fp.read()) self.prelude_ratio = self._compress(self.prelude_str) return @@ -102,7 +102,7 @@ class ZlibLlmDetector(AIDetector): self.prelude_ratio = prelude_ratio if prelude_file != None: - with open(prelude_file) as fp: + with open(prelude_file, encoding='utf-8') as fp: self.prelude_str = clean_text(fp.read()) lines = self.prelude_str.split('\n') self.prelude_chunks = array_split(lines, ceil(len(self.prelude_str) / 2**abs(self.WBITS))) @@ -153,7 +153,7 @@ class LzmaLlmDetector(AIDetector): if prelude_file != None: # Read it once to get the default compression ratio for the prelude - with open(prelude_file, 'r') as fp: + with open(prelude_file, 'r', encoding='utf-8') as fp: self.prelude_str = fp.read() self.prelude_ratio = self._compress(self.prelude_str) return @@ -212,7 +212,7 @@ class Zippy: def run_on_file(self, filename : str) -> Optional[Score]: '''Given a filename (and an optional number of decimal places to round to) returns the score for the contents of that file''' - with open(filename, 'r') as fp: + with open(filename, 'r', encoding='utf-8') as fp: txt = fp.read() #print('Calculating score for input of length ' + str(len(txt))) return self.detector.score_text(txt) @@ -230,7 +230,7 @@ class Zippy: This function chunks the file into at most chunk_size parts to score separately, then returns an average. This prevents a very large input being skewed because its compression ratio starts to overwhelm the prelude file. ''' - with open(filename, 'r') as fp: + with open(filename, 'r', encoding='utf-8') as fp: contents = fp.read() return self.run_on_text_chunked(contents, chunk_size, prelude_ratio=prelude_ratio) @@ -296,7 +296,7 @@ class EnsembledZippy: def run_on_file(self, filename : str) -> Optional[Score]: '''Given a filename (and an optional number of decimal places to round to) returns the score for the contents of that file''' - with open(filename, 'r') as fp: + with open(filename, 'r', encoding='utf-8') as fp: txt = fp.read() scores = [] for c in self.component_classifiers: @@ -315,7 +315,7 @@ class EnsembledZippy: This function chunks the file into at most chunk_size parts to score separately, then returns an average. This prevents a very large input being skewed because its compression ratio starts to overwhelm the prelude file. ''' - with open(filename, 'r') as fp: + with open(filename, 'r', encoding='utf-8') as fp: contents = fp.read() return self.run_on_text_chunked(contents, chunk_size)