Naposledy aktivní 1756454403

See full blog post https://travisshears.com/tech/blog/blocking-ai-scraping

travisshears's Avatar travisshears revidoval tento gist 1756454403. Přejít na revizi

Žádné změny

travisshears's Avatar travisshears revidoval tento gist 1756454377. Přejít na revizi

1 file changed, 0 insertions, 0 deletions

botPolicy.ymal přejmenováno na botPolicy.yaml

Soubor přejmenován beze změn

travisshears's Avatar travisshears revidoval tento gist 1756454351. Přejít na revizi

Žádné změny

travisshears's Avatar travisshears revidoval tento gist 1756450023. Přejít na revizi

3 files changed, 158 insertions

botPolicy.ymal(vytvořil soubor)

@@ -0,0 +1,84 @@
1 + bots:
2 + - name: php
3 + path_regex: php$
4 + action: DENY
5 + - name: api-calls
6 + expression:
7 + any:
8 + - 'method == "POST"'
9 + - 'method == "PATCH"'
10 + - 'method == "DELETE"'
11 + - 'path.contains("/api/")'
12 + action: ALLOW
13 + # Defaults taken from https://github.com/TecharoHQ/anubis/blob/main/data/botPolicies.yaml
14 + # Pathological bots to deny
15 + - import: (data)/bots/_deny-pathological.yaml
16 + - import: (data)/bots/aggressive-brazilian-scrapers.yaml
17 + - import: (data)/meta/ai-block-aggressive.yaml
18 + # Search engine crawlers to allow, defaults to:
19 + - import: (data)/crawlers/_allow-good.yaml
20 + # Challenge Firefox AI previews
21 + - import: (data)/clients/x-firefox-ai.yaml
22 + # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
23 + - import: (data)/common/keep-internet-working.yaml
24 + # Generic catchall rule
25 + - name: generic-browser
26 + user_agent_regex: >-
27 + Mozilla|Opera
28 + action: WEIGH
29 + weight:
30 + adjust: 10
31 +
32 + # The weight thresholds for when to trigger individual challenges. Any
33 + # CHALLENGE will take precedence over this.
34 + #
35 + # A threshold has four configuration options:
36 + #
37 + # - name: the name that is reported down the stack and used for metrics
38 + # - expression: A CEL expression with the request weight in the variable
39 + # weight
40 + # - action: the Anubis action to apply, similar to in a bot policy
41 + # - challenge: which challenge to send to the user, similar to in a bot policy
42 + #
43 + # See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
44 + # information.
45 + thresholds:
46 + # By default Anubis ships with the following thresholds:
47 + - name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
48 + expression: weight <= 0 # a feather weighs zero units
49 + action: ALLOW # Allow the traffic through
50 + # For clients that had some weight reduced through custom rules, give them a
51 + # lightweight challenge.
52 + - name: mild-suspicion
53 + expression:
54 + all:
55 + - weight > 0
56 + - weight < 10
57 + action: CHALLENGE
58 + challenge:
59 + # https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
60 + algorithm: metarefresh
61 + difficulty: 1
62 + report_as: 1
63 + # For clients that are browser-like but have either gained points from custom rules or
64 + # report as a standard browser.
65 + - name: moderate-suspicion
66 + expression:
67 + all:
68 + - weight >= 10
69 + - weight < 20
70 + action: CHALLENGE
71 + challenge:
72 + # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
73 + algorithm: fast
74 + difficulty: 2 # two leading zeros, very fast for most clients
75 + report_as: 2
76 + # For clients that are browser like and have gained many points from custom rules
77 + - name: extreme-suspicion
78 + expression: weight >= 20
79 + action: CHALLENGE
80 + challenge:
81 + # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
82 + algorithm: fast
83 + difficulty: 4
84 + report_as: 4

docker-compose.yml(vytvořil soubor)

@@ -0,0 +1,17 @@
1 + services:
2 + # Anubis service for personal site protection
3 + anubis-001-personal-site:
4 + image: ghcr.io/techarohq/anubis:latest
5 + restart: unless-stopped
6 + container_name: anubis
7 + network_mode: host
8 + volumes:
9 + - "./botPolicy.yaml:/botPolicy.yaml:ro"
10 +
11 + environment:
12 + TARGET: "http://127.0.0.1:5019"
13 + COOKIE_DOMAIN: "travisshears.com"
14 + SERVE_ROBOTS_TXT: "true"
15 + BIND: "127.0.0.1:5018"
16 + METRICS_BIND: "127.0.0.1:5020"
17 + POLICY_FNAME: "/botPolicy.yaml"

nginx-config(vytvořil soubor)

@@ -0,0 +1,57 @@
1 + server {
2 + server_name travisshears.com;
3 +
4 + access_log /var/log/nginx/travisshears.pre-anubis.com.access.log json_logs;
5 + error_log /var/log/nginx/travisshears.pre-anubis.com.error.log;
6 +
7 + proxy_set_header Origin $http_origin;
8 + proxy_set_header HOST $host;
9 + proxy_set_header X-Forwarded-Proto $scheme;
10 + proxy_set_header X-Real-IP $remote_addr;
11 + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
12 + proxy_set_header X-Http-Version $server_protocol;
13 + client_max_body_size 25M;
14 +
15 + location / {
16 + limit_except GET HEAD OPTIONS {
17 + proxy_pass personal-site-url
18 + }
19 + proxy_pass http://anubis-001;
20 + }
21 +
22 + ...
23 + }
24 +
25 + server {
26 + if ($host = travisshears.com) {
27 + return 301 https://$host$request_uri;
28 + }
29 +
30 +
31 + listen 80;
32 + listen [::]:80 ;
33 + server_name travisshears.com;
34 + return 404;
35 + }
36 +
37 +
38 + server {
39 + server_name travisshears.com;
40 + listen 127.0.0.1:5019;
41 +
42 + access_log /var/log/nginx/travisshears.post-anubis.com.access.log json_logs;
43 + error_log /var/log/nginx/travisshears.post-anubis.com.error.log;
44 +
45 + # Get the visiting IP from the TLS termination server
46 + set_real_ip_from unix:;
47 + real_ip_header X-Real-IP;
48 +
49 + location / {
50 + proxy_pass personal-site-url
51 + }
52 + }
53 +
54 + upstream anubis-001 {
55 + server 127.0.0.1:5018;
56 + keepalive 30;
57 + }
Novější Starší