aboutsummaryrefslogtreecommitdiffstats
path: root/roles/web/templates/sites/233233.w.conf.j2
blob: 0502eddd76a38d7cad0ffb32a727cad289545a4a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
{% set domain = "233233.xyz" %}
#
# nginx/sites: reverse proxy to the Chinese Wikipedia:
# * w.{{ domain }}    -> en.wikipedia.org
# * w.{{ domain }}/m/ -> en.m.wikipedia.org
#
#
# Aaron LI
# 2019-09-22
#

{% if domains_hascert[domain] %}
server {
    listen            443 ssl http2;
    listen       [::]:443 ssl http2;
    server_name  w.{{ domain }};

    # SSL/TLS Certificate kindly provided by Let's Encrypt
    ssl_certificate      {{ web.ssl_root }}/{{ domain }}/fullchain;
    ssl_certificate_key  {{ web.ssl_root }}/{{ domain }}/key;

    # Enable caching
    #proxy_cache  CACHE;

    # Replace cookie domain
    proxy_cookie_domain  wikipedia.org  $host;

    # Hide some upstream headers to avoid duplicates/overrideing
    proxy_hide_header  Strict-Transport-Security;
    proxy_hide_header  Content-Security-Policy;
    proxy_hide_header  X-Frame-Options;
    proxy_hide_header  X-XSS-Protection;
    proxy_hide_header  X-Content-Type-Options;
    proxy_hide_header  Referrer-Policy;

    # Substitute links in contents
    # NOTE: Require to set Accept-Encoding="" header in order to request
    #       *uncompressed* data from upstream, otherwise won't work!
    sub_filter_types  text/css text/javascript application/json;
    sub_filter_once   off;
    {% block sub_filter_common %}
    sub_filter  //en.wikipedia.org/             //$host/;
    sub_filter  //en.m.wikipedia.org/           //$host/m/;
    sub_filter  //meta.wikimedia.org/           //$host/__wikimedia/meta/;
    sub_filter  //upload.wikimedia.org/         //$host/__wikimedia/upload/;
    {% endblock %}

    # Reverse proxy to en.wikipedia.org
    location / {
        proxy_pass           https://en.wikipedia.org;

        # Handle the redirection to the mobile version
        proxy_redirect       default;
        proxy_redirect       https://en.m.wikipedia.org/ /m/;

        # NOTE: The `Host` header cannot be set to `$host`, otherwise,
        #       we get error "Domain not configured" from Wikipedia.
        proxy_set_header  Host               en.wikipedia.org;
        proxy_set_header  Referer            https://en.wikipedia.org;
        {% block proxy_set_header_common %}
        proxy_set_header  User-Agent         $http_user_agent;
        proxy_set_header  X-Real-IP          $remote_addr;
        proxy_set_header  X-Forwarded-For    $proxy_add_x_forwarded_for;
        proxy_set_header  X-Forwarded-Proto  $scheme;
        proxy_set_header  Cookie             "";
        proxy_set_header  Accept-Encoding    "";
        proxy_set_header  Accept-Language    $http_accept_language;
        {% endblock %}
    }

    # Reverse proxy to en.m.wikipedia.org
    location ^~ /m/ {
        # NOTE: This `proxy_pass` directive is specified WITH an URI
        #       (i.e., the trailing `/` here), then when a request is
        #       passed to the server, the part of a *normalized*
        #       request URI matching the location is replaced by the
        #       URI specified in the directive.
        # Reference: http://nginx.org/r/proxy_pass
        proxy_pass           https://en.m.wikipedia.org/;

        # Handle the redirection to the desktop version
        proxy_redirect       default;
        proxy_redirect       https://en.wikipedia.org/ /;

        proxy_set_header  Host               en.m.wikipedia.org;
        proxy_set_header  Referer            https://en.m.wikipedia.org;
        # NOTE: The upper level "proxy_set_header" directives are *not*
        #       inherited since there are such directives on this level!
        {{ self.proxy_set_header_common() }}

        # All "sub_filter" directives from upper level must be copied here!
        {{ self.sub_filter_common() }}
        sub_filter  /wiki/  /m/wiki/;
    }

    location ^~ /__wikimedia/meta/ {
        # ^~ will make location search stop here if matched.
        proxy_pass  https://meta.wikimedia.org/;
        # Note the trailing '/' above, which tells Nginx to strip the
        # matched URI.
        # Credit: https://serverfault.com/a/725433/387898

        proxy_set_header  Host     meta.wikimedia.org;
        proxy_set_header  Referer  https://meta.wikimedia.org;
        {{ self.proxy_set_header_common() }}
    }
    location ^~ /__wikimedia/upload/ {
        proxy_pass        https://upload.wikimedia.org/;
        proxy_set_header  Host     upload.wikimedia.org;
        proxy_set_header  Referer  https://upload.wikimedia.org;
        {{ self.proxy_set_header_common() }}
    }

    # Forbid spider
    if ($http_user_agent ~* "qihoobot|Baiduspider|Googlebot|Googlebot-Mobile|Googlebot-Image|Mediapartners-Google|Adsbot-Google|Feedfetcher-Google|Yahoo! Slurp|Yahoo! Slurp China|YoudaoBot|Sosospider|Sogou spider|Sogou web spider|MSNBot|ia_archiver|Tomato Bot") {
        return  403;
    }

    location /robots.txt {
        default_type  text/plain;
        return  200  "User-agent: *\nDisallow: /\n";
    }
}
{% endif %}