blob: 0502eddd76a38d7cad0ffb32a727cad289545a4a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
{% set domain = "233233.xyz" %}
#
# nginx/sites: reverse proxy to the Chinese Wikipedia:
# * w.{{ domain }} -> en.wikipedia.org
# * w.{{ domain }}/m/ -> en.m.wikipedia.org
#
#
# Aaron LI
# 2019-09-22
#
{% if domains_hascert[domain] %}
server {
listen 443 ssl http2;
listen [::]:443 ssl http2;
server_name w.{{ domain }};
# SSL/TLS Certificate kindly provided by Let's Encrypt
ssl_certificate {{ web.ssl_root }}/{{ domain }}/fullchain;
ssl_certificate_key {{ web.ssl_root }}/{{ domain }}/key;
# Enable caching
#proxy_cache CACHE;
# Replace cookie domain
proxy_cookie_domain wikipedia.org $host;
# Hide some upstream headers to avoid duplicates/overrideing
proxy_hide_header Strict-Transport-Security;
proxy_hide_header Content-Security-Policy;
proxy_hide_header X-Frame-Options;
proxy_hide_header X-XSS-Protection;
proxy_hide_header X-Content-Type-Options;
proxy_hide_header Referrer-Policy;
# Substitute links in contents
# NOTE: Require to set Accept-Encoding="" header in order to request
# *uncompressed* data from upstream, otherwise won't work!
sub_filter_types text/css text/javascript application/json;
sub_filter_once off;
{% block sub_filter_common %}
sub_filter //en.wikipedia.org/ //$host/;
sub_filter //en.m.wikipedia.org/ //$host/m/;
sub_filter //meta.wikimedia.org/ //$host/__wikimedia/meta/;
sub_filter //upload.wikimedia.org/ //$host/__wikimedia/upload/;
{% endblock %}
# Reverse proxy to en.wikipedia.org
location / {
proxy_pass https://en.wikipedia.org;
# Handle the redirection to the mobile version
proxy_redirect default;
proxy_redirect https://en.m.wikipedia.org/ /m/;
# NOTE: The `Host` header cannot be set to `$host`, otherwise,
# we get error "Domain not configured" from Wikipedia.
proxy_set_header Host en.wikipedia.org;
proxy_set_header Referer https://en.wikipedia.org;
{% block proxy_set_header_common %}
proxy_set_header User-Agent $http_user_agent;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Cookie "";
proxy_set_header Accept-Encoding "";
proxy_set_header Accept-Language $http_accept_language;
{% endblock %}
}
# Reverse proxy to en.m.wikipedia.org
location ^~ /m/ {
# NOTE: This `proxy_pass` directive is specified WITH an URI
# (i.e., the trailing `/` here), then when a request is
# passed to the server, the part of a *normalized*
# request URI matching the location is replaced by the
# URI specified in the directive.
# Reference: http://nginx.org/r/proxy_pass
proxy_pass https://en.m.wikipedia.org/;
# Handle the redirection to the desktop version
proxy_redirect default;
proxy_redirect https://en.wikipedia.org/ /;
proxy_set_header Host en.m.wikipedia.org;
proxy_set_header Referer https://en.m.wikipedia.org;
# NOTE: The upper level "proxy_set_header" directives are *not*
# inherited since there are such directives on this level!
{{ self.proxy_set_header_common() }}
# All "sub_filter" directives from upper level must be copied here!
{{ self.sub_filter_common() }}
sub_filter /wiki/ /m/wiki/;
}
location ^~ /__wikimedia/meta/ {
# ^~ will make location search stop here if matched.
proxy_pass https://meta.wikimedia.org/;
# Note the trailing '/' above, which tells Nginx to strip the
# matched URI.
# Credit: https://serverfault.com/a/725433/387898
proxy_set_header Host meta.wikimedia.org;
proxy_set_header Referer https://meta.wikimedia.org;
{{ self.proxy_set_header_common() }}
}
location ^~ /__wikimedia/upload/ {
proxy_pass https://upload.wikimedia.org/;
proxy_set_header Host upload.wikimedia.org;
proxy_set_header Referer https://upload.wikimedia.org;
{{ self.proxy_set_header_common() }}
}
# Forbid spider
if ($http_user_agent ~* "qihoobot|Baiduspider|Googlebot|Googlebot-Mobile|Googlebot-Image|Mediapartners-Google|Adsbot-Google|Feedfetcher-Google|Yahoo! Slurp|Yahoo! Slurp China|YoudaoBot|Sosospider|Sogou spider|Sogou web spider|MSNBot|ia_archiver|Tomato Bot") {
return 403;
}
location /robots.txt {
default_type text/plain;
return 200 "User-agent: *\nDisallow: /\n";
}
}
{% endif %}
|