Zuletzt aktiv 1756454403

See full blog post https://travisshears.com/tech/blog/blocking-ai-scraping

Änderung 83d334d64fb63503f4dbb414de463ebc29066b92

botPolicy.yaml Orginalformat
1bots:
2 - name: php
3 path_regex: php$
4 action: DENY
5 - name: api-calls
6 expression:
7 any:
8 - 'method == "POST"'
9 - 'method == "PATCH"'
10 - 'method == "DELETE"'
11 - 'path.contains("/api/")'
12 action: ALLOW
13 # Defaults taken from https://github.com/TecharoHQ/anubis/blob/main/data/botPolicies.yaml
14 # Pathological bots to deny
15 - import: (data)/bots/_deny-pathological.yaml
16 - import: (data)/bots/aggressive-brazilian-scrapers.yaml
17 - import: (data)/meta/ai-block-aggressive.yaml
18 # Search engine crawlers to allow, defaults to:
19 - import: (data)/crawlers/_allow-good.yaml
20 # Challenge Firefox AI previews
21 - import: (data)/clients/x-firefox-ai.yaml
22 # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
23 - import: (data)/common/keep-internet-working.yaml
24 # Generic catchall rule
25 - name: generic-browser
26 user_agent_regex: >-
27 Mozilla|Opera
28 action: WEIGH
29 weight:
30 adjust: 10
31
32# The weight thresholds for when to trigger individual challenges. Any
33# CHALLENGE will take precedence over this.
34#
35# A threshold has four configuration options:
36#
37# - name: the name that is reported down the stack and used for metrics
38# - expression: A CEL expression with the request weight in the variable
39# weight
40# - action: the Anubis action to apply, similar to in a bot policy
41# - challenge: which challenge to send to the user, similar to in a bot policy
42#
43# See https://anubis.techaro.lol/docs/admin/configuration/thresholds for more
44# information.
45thresholds:
46 # By default Anubis ships with the following thresholds:
47 - name: minimal-suspicion # This client is likely fine, its soul is lighter than a feather
48 expression: weight <= 0 # a feather weighs zero units
49 action: ALLOW # Allow the traffic through
50 # For clients that had some weight reduced through custom rules, give them a
51 # lightweight challenge.
52 - name: mild-suspicion
53 expression:
54 all:
55 - weight > 0
56 - weight < 10
57 action: CHALLENGE
58 challenge:
59 # https://anubis.techaro.lol/docs/admin/configuration/challenges/metarefresh
60 algorithm: metarefresh
61 difficulty: 1
62 report_as: 1
63 # For clients that are browser-like but have either gained points from custom rules or
64 # report as a standard browser.
65 - name: moderate-suspicion
66 expression:
67 all:
68 - weight >= 10
69 - weight < 20
70 action: CHALLENGE
71 challenge:
72 # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
73 algorithm: fast
74 difficulty: 2 # two leading zeros, very fast for most clients
75 report_as: 2
76 # For clients that are browser like and have gained many points from custom rules
77 - name: extreme-suspicion
78 expression: weight >= 20
79 action: CHALLENGE
80 challenge:
81 # https://anubis.techaro.lol/docs/admin/configuration/challenges/proof-of-work
82 algorithm: fast
83 difficulty: 4
84 report_as: 4
85
docker-compose.yml Orginalformat
1services:
2 # Anubis service for personal site protection
3 anubis-001-personal-site:
4 image: ghcr.io/techarohq/anubis:latest
5 restart: unless-stopped
6 container_name: anubis
7 network_mode: host
8 volumes:
9 - "./botPolicy.yaml:/botPolicy.yaml:ro"
10
11 environment:
12 TARGET: "http://127.0.0.1:5019"
13 COOKIE_DOMAIN: "travisshears.com"
14 SERVE_ROBOTS_TXT: "true"
15 BIND: "127.0.0.1:5018"
16 METRICS_BIND: "127.0.0.1:5020"
17 POLICY_FNAME: "/botPolicy.yaml"
18
nginx-config Orginalformat
1server {
2 server_name travisshears.com;
3
4 access_log /var/log/nginx/travisshears.pre-anubis.com.access.log json_logs;
5 error_log /var/log/nginx/travisshears.pre-anubis.com.error.log;
6
7 proxy_set_header Origin $http_origin;
8 proxy_set_header HOST $host;
9 proxy_set_header X-Forwarded-Proto $scheme;
10 proxy_set_header X-Real-IP $remote_addr;
11 proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
12 proxy_set_header X-Http-Version $server_protocol;
13 client_max_body_size 25M;
14
15 location / {
16 limit_except GET HEAD OPTIONS {
17 proxy_pass personal-site-url
18 }
19 proxy_pass http://anubis-001;
20 }
21
22 ...
23}
24
25server {
26 if ($host = travisshears.com) {
27 return 301 https://$host$request_uri;
28 }
29
30
31 listen 80;
32 listen [::]:80 ;
33 server_name travisshears.com;
34 return 404;
35}
36
37
38server {
39 server_name travisshears.com;
40 listen 127.0.0.1:5019;
41
42 access_log /var/log/nginx/travisshears.post-anubis.com.access.log json_logs;
43 error_log /var/log/nginx/travisshears.post-anubis.com.error.log;
44
45 # Get the visiting IP from the TLS termination server
46 set_real_ip_from unix:;
47 real_ip_header X-Real-IP;
48
49 location / {
50 proxy_pass personal-site-url
51 }
52}
53
54upstream anubis-001 {
55 server 127.0.0.1:5018;
56 keepalive 30;
57}
58