From cd68ae31263e8e44721491a4e8af9232aa6b283b Mon Sep 17 00:00:00 2001 From: Alec Muffett Date: Sun, 12 Aug 2018 16:47:12 +0100 Subject: [PATCH] commit: remove classic/soft mode --- demo.d/example.tconf | 18 ++-- demo.d/wikipedia.tconf | 3 - lib.d/do-configure.pl | 2 +- lib.d/lint.pl | 1 - templates.d/nginx-hard.conf.txt | 14 ---- templates.d/nginx.conf.txt | 141 +------------------------------- 6 files changed, 11 insertions(+), 168 deletions(-) delete mode 100755 templates.d/nginx-hard.conf.txt diff --git a/demo.d/example.tconf b/demo.d/example.tconf index 793f677..e90baf1 100644 --- a/demo.d/example.tconf +++ b/demo.d/example.tconf @@ -39,18 +39,16 @@ # # set hardcoded_endpoint_csv /path,stringvalue ... -# ---- HARD-MODE ---- +# ---- PRESERVE_CSV ---- -# hard-mode is an experimental but very powerful brute-force -# search-and-replace strategy for editing content on the fly; it -# changes the way the nginx.conf works to rewrite HTML/other -# content. One side-effect of this is that some instances of domain -# names may be rewritten unwantedly (eg: foo@facebook.com -> -# foo@facebookcorewwwi.onion) which is reason for `preserve_csv` which -# uses a very simple heuristic to try and protect plaintext domain -# names from being rewritten. +# EOTK uses a search-and-replace strategy for editing content on the +# fly; one side-effect of this is that some instances of domain names +# may be rewritten unwantedly (eg: email addresses like +# foo@facebook.com become foo@facebookcorewwwi.onion) + +# `preserve_csv` uses a very simple heuristic to try and protect +# plaintext domain names from being rewritten. -# set hard_mode 1 # set preserve_csv uniquetoken,regexp,regexpcaseflag,replacement ... # eg: `set preserve_csv fbtld,facebook\\.com,i,facebook.com` ... will diff --git a/demo.d/wikipedia.tconf b/demo.d/wikipedia.tconf index 4b01064..365b246 100644 --- a/demo.d/wikipedia.tconf +++ b/demo.d/wikipedia.tconf @@ -1,9 +1,6 @@ # -*- conf -*- # eotk (c) 2017 Alec Muffett -# use brute-force "search and replace" strategy -set hard_mode 1 - # CSVs of canonical domains (eg: email) to preserve (todo: more here?) # nb: you must explicitly list all domains that are of preservation; # "foo.com" & "www.foo.com" are treated as separate, for this purpose diff --git a/lib.d/do-configure.pl b/lib.d/do-configure.pl index dbda8a2..d9d5500 100755 --- a/lib.d/do-configure.pl +++ b/lib.d/do-configure.pl @@ -433,7 +433,7 @@ sub DoProject { # default-set values &SetEnv("block_err", "This action is not supported over Onion yet, sorry."); &SetEnv("force_https", 1); -&SetEnv("hard_mode", 0); +&SetEnv("hard_mode", 1); &SetEnv("nginx_action_abort", "return 500"); &SetEnv("nginx_block_busy_size", "16k"); &SetEnv("nginx_block_count", 8); diff --git a/lib.d/lint.pl b/lib.d/lint.pl index a0f00ec..e5b0a42 100755 --- a/lib.d/lint.pl +++ b/lib.d/lint.pl @@ -40,7 +40,6 @@ my %known = 'FORCE_HTTPS' => 1, 'FOREIGNMAP_CSV' => 1, 'HARDCODED_ENDPOINT_CSV' => 1, - 'HARD_MODE' => 1, 'HOST_BLACKLIST' => 1, 'HOST_BLACKLIST_RE' => 1, 'HOST_WHITELIST' => 1, diff --git a/templates.d/nginx-hard.conf.txt b/templates.d/nginx-hard.conf.txt deleted file mode 100755 index 167b222..0000000 --- a/templates.d/nginx-hard.conf.txt +++ /dev/null @@ -1,14 +0,0 @@ -# -*- awk -*- -# eotk (c) 2017 Alec Muffett - -"nginx-hard.conf" has been deprecated & merged into the "classic" config, -in order to simplify future development. If you are seeing this message, -please remove the following line: - - set nginx_template templates.d/nginx-hard.conf.txt - -...from your configuration, and replace it with: - - set hard_mode 1 - -Apologies for the inconvenience. diff --git a/templates.d/nginx.conf.txt b/templates.d/nginx.conf.txt index 9fb12cb..a452d92 100755 --- a/templates.d/nginx.conf.txt +++ b/templates.d/nginx.conf.txt @@ -3,16 +3,6 @@ # EMACS awk mode works quite well for nginx configs -# ---- BEGIN HARD/CLASSIC SWITCH ---- -%%IF %HARD_MODE% -# *HARD* configuration -# swap domain names for onions via brute-force, with whitelisted repairs... -%%ELSE -# *CLASSIC* configuration -# swap domain names for onions via targeted regular expressions... -%%ENDIF -# ---- END HARD/CLASSIC SWITCH ---- - # logs and pids pid %PROJECT_DIR%/nginx.pid; error_log %LOG_DIR%/nginx-error.log %NGINX_SYSLOG%; @@ -123,9 +113,6 @@ http { %%ENDIF ; - #================================================================== - %%IF %HARD_MODE% - # ---- BEGIN HARD MODE CODE ---- %%IF %PRESERVE_CSV% # preserve subs (save-phase): 1=description,2=re,3=i_or_empty,4=replacement %%CSV %PRESERVE_CSV% @@ -141,14 +128,14 @@ http { %%ENDIF %%BEGIN - # HARD-MODE: %DNS_DOMAIN% -> %ONION_ADDRESS% + # map: %DNS_DOMAIN% -> %ONION_ADDRESS% subs_filter \\b%DNS_DOMAIN_RE2%\\b %ONION_ADDRESS% gir ; %%IF %HARD_MODE% > 1 - # HARD-MODE-EXTRA: %DNS_DOMAIN_RE% -> %ONION_ADDRESS_RE% + # extra map: %DNS_DOMAIN_RE% -> %ONION_ADDRESS_RE% subs_filter \\b%DNS_DOMAIN_RERE2%\\b %ONION_ADDRESS_RE2% @@ -184,77 +171,6 @@ http { %%ELSE # no preserve subs (restore-phase) %%ENDIF - # ---- END HARD MODE CODE ---- - #------------------------------------------------------------------ - %%ELSE - #------------------------------------------------------------------ - # ---- BEGIN CLASSIC MODE CODE ---- - - # subs_filter: these patterns bear some explanation; the goal is to - # work regular expressions really hard in order to minimise the - # number of expressions which are used in the basic config, so the - # basic pattern is to capture zero/more "sub." in "//sub.foo.com" - # and interpolate that into "//sub.xxxxxxxx.onion"; so far? - - # but it turns out that some JSON libraries like to "escape" the - # forward slashes in JSON content, leading to input like (literal) - # "http:\/\/sub.foo.com\/foo.html" - so you need to add the - # backslashes, but then you need to escape the backslashes, except - # they need double-escaping in the regexp because of string - # interpolation; hence 4x backslash -> 1x matched character - - # likewise we use the "_RE2" form of the re-escaped domain name in - # order to coerce the regexp to match literal dots, not wildcards. - - # there seems to be some sort of shortcut at play here; the trailing - # "\\b" also seems to work as "\b" however that would apparently - # break the double-escaping that is necessary/works everywhere else - # in subs_filter. - - # also, regrettably, named capture groups appear not to work, we're - # fortunate that there appear not to be more than 9 capture groups - # by default, lest "$1" bleed into the subsequent digits of an onion - # address: $1234567abcdefghij.onion - - # finally: some sites encode // with %-encoded "2F" in URIs... - - %%BEGIN - # for %DNS_DOMAIN% -> %ONION_ADDRESS% anchored by // or \/\/ - subs_filter - (/|\\\\/\\\\)/(([-0-9a-z]+\\.)+)?%DNS_DOMAIN_RE2%\\b - $1/$2%ONION_ADDRESS% - gir - ; - # for %DNS_DOMAIN% -> %ONION_ADDRESS% anchored with hex-encoded slashes - subs_filter - %%2F%%2F(([-0-9a-z]+\\.)+)?%DNS_DOMAIN_RE2%\\b - %%2F%%2F$1%ONION_ADDRESS% - gir - ; - %%END - - %%IF %FOREIGNMAP_CSV% - # foreignmap subs: 1=onion,2=re,3=re2,4=dns,5=re,6=re2 - %%CSV %FOREIGNMAP_CSV% - # for %4% -> %1% anchored by // or \/\/ - subs_filter - (/|\\\\/\\\\)/(([-0-9a-z]+\\.)+)?%6%\\b - $1/$2%1% - gir - ; - # for %4% -> %1% anchored with hex-encoded slashes - subs_filter - %%2F%%2F(([-0-9a-z]+\\.)+)?%6%\\b - %%2F%%2F$1%1% - gir - ; - %%ENDCSV - %%ELSE - # no foreignmap subs - %%ENDIF - # ---- END CLASSIC MODE CODE ---- - %%ENDIF - #================================================================== # o_to_d_lookup -> if cannot remap, return input. note: old versions # of lua-plugin cannot cope with code like o_to_d_mappings[o[1]] @@ -350,39 +266,6 @@ http { # filter the response headers en-route back to the user header_filter_by_lua_block { local k, v - -- ================================================================== - %%IF ! %HARD_MODE% - -- ---- BEGIN CLASSIC MODE CODE ---- - -- is this javascript/json? if so, extra processing: - -- 1) set a processing flag to pick up in body_filter_by_lua_block - -- 2) invalidate content-length, because we will change it - k = "Content-Type" - v = ngx.header[k] - if v == "application/javascript" or - v == "application/json" or - v == "application/x-javascript" or - v == "text/css" or - v == "text/javascript" then - ngx.ctx.needs_extra_processing = 1 - ngx.header.content_length = nil - end - %%IF %EXTRA_PROCESSING_CSV% - -- run on `v` for further extra_processing_csv checks - %%CSV %EXTRA_PROCESSING_CSV% - if v == "%1%" then - local m, err = ngx.re.match(ngx.var.uri, "%2%", "io") - if m then - ngx.ctx.needs_extra_processing = 1 - ngx.header.content_length = nil - end - end - %%ENDCSV - %%ELSE - -- no extra_processing_csv checks - %%ENDIF - -- ---- END CLASSIC MODE CODE ---- - %%ENDIF - -- ================================================================== local origin_rewrites = { "Access-Control-Allow-Origin", @@ -408,26 +291,6 @@ http { # filter the response body en-route back to the user body_filter_by_lua_block { - -- ================================================================== - %%IF ! %HARD_MODE% - -- ---- BEGIN CLASSIC MODE CODE ---- - -- rather than blindly replacing "foo.com" with "foo.onion" everywhere, - -- instead we restrict such brute-force replacement to content that was - -- flagged in header_filter_by_lua_block - if ngx.ctx.needs_extra_processing == 1 then - -- the flag was set; this content deserves brute-force search & replace - local chunk = ngx.arg[1] - -- subs_filter picked up the "//"-anchored strings; now we sub the rest - chunk = dns_to_onion(chunk) - -- and we sub the basic "foo\.com" regular-expressions, too - chunk = dnsre_to_onionre(chunk) - -- more complex regular expressions are out of scope. - ngx.arg[1] = chunk - end - -- ---- END CLASSIC MODE CODE ---- - %%ENDIF - -- ================================================================== - %%IF %DEBUG_TRAP% -- debug traps local i = ngx.arg[1]