feat: include a default robots.txt to reduce the impact of crawlers (#7387)
- Add a strong strict default robots.txt, if one is not provided by the instance administrators. - Remove code for the legacy public asset path, the error has been logged for a few releases already (existed since v1.21). - Resolves forgejo/forgejo#923 Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7387 Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Reviewed-by: 0ko <0ko@noreply.codeberg.org> Co-authored-by: Gusted <postmaster@gusted.xyz> Co-committed-by: Gusted <postmaster@gusted.xyz>
This commit is contained in:
parent
51caba694a
commit
bb4e1f426f
2 changed files with 88 additions and 12 deletions
|
@ -198,9 +198,6 @@ func serveInstalled(ctx *cli.Context) error {
|
|||
for fn := range publicFilesSet.Seq() {
|
||||
log.Error("Found legacy public asset %q in CustomPath. Please move it to %s/public/assets/%s", fn, setting.CustomPath, fn)
|
||||
}
|
||||
if _, err := os.Stat(filepath.Join(setting.CustomPath, "robots.txt")); err == nil {
|
||||
log.Error(`Found legacy public asset "robots.txt" in CustomPath. Please move it to %s/public/robots.txt`, setting.CustomPath)
|
||||
}
|
||||
|
||||
routers.InitWebInstalled(graceful.GetManager().HammerContext())
|
||||
|
||||
|
|
|
@ -33,17 +33,96 @@ func DummyOK(w http.ResponseWriter, req *http.Request) {
|
|||
w.WriteHeader(http.StatusOK)
|
||||
}
|
||||
|
||||
func RobotsTxt(w http.ResponseWriter, req *http.Request) {
|
||||
robotsTxt := util.FilePathJoinAbs(setting.CustomPath, "public/robots.txt")
|
||||
if ok, _ := util.IsExist(robotsTxt); !ok {
|
||||
robotsTxt = util.FilePathJoinAbs(setting.CustomPath, "robots.txt") // the legacy "robots.txt"
|
||||
}
|
||||
httpcache.SetCacheControlInHeader(w.Header(), setting.StaticCacheTime)
|
||||
http.ServeFile(w, req, robotsTxt)
|
||||
}
|
||||
|
||||
func StaticRedirect(target string) func(w http.ResponseWriter, req *http.Request) {
|
||||
return func(w http.ResponseWriter, req *http.Request) {
|
||||
http.Redirect(w, req, path.Join(setting.StaticURLPrefix, target), http.StatusMovedPermanently)
|
||||
}
|
||||
}
|
||||
|
||||
var defaultRobotsTxt = []byte(`# The default Forgejo robots.txt
|
||||
# For more information: https://forgejo.org/docs/latest/admin/search-engines-indexation/
|
||||
|
||||
User-agent: *
|
||||
Disallow: /api/
|
||||
Disallow: /avatars/
|
||||
Disallow: /user/
|
||||
Disallow: /swagger.*.json
|
||||
Disallow: /explore/*?*
|
||||
|
||||
Disallow: /repo/create
|
||||
Disallow: /repo/migrate
|
||||
Disallow: /org/create
|
||||
Disallow: /*/*/fork
|
||||
|
||||
Disallow: /*/*/watchers
|
||||
Disallow: /*/*/stargazers
|
||||
Disallow: /*/*/forks
|
||||
|
||||
Disallow: /*/*/src/
|
||||
Disallow: /*/*/blame/
|
||||
Disallow: /*/*/commit/
|
||||
Disallow: /*/*/commits/
|
||||
Disallow: /*/*/raw/
|
||||
Disallow: /*/*/media/
|
||||
Disallow: /*/*/tags
|
||||
Disallow: /*/*/graph
|
||||
Disallow: /*/*/branches
|
||||
Disallow: /*/*/compare
|
||||
Disallow: /*/*/lastcommit/
|
||||
Disallow: /*/*/rss/branch/
|
||||
Disallow: /*/*/atom/branch/
|
||||
|
||||
Disallow: /*/*/activity
|
||||
Disallow: /*/*/activity_author_data
|
||||
|
||||
Disallow: /*/*/actions
|
||||
Disallow: /*/*/projects
|
||||
Disallow: /*/*/labels
|
||||
Disallow: /*/*/milestones
|
||||
|
||||
Disallow: /*/*/find/
|
||||
Disallow: /*/*/tree-list/
|
||||
Disallow: /*/*/search/
|
||||
Disallow: /*/-/code
|
||||
|
||||
Disallow: /*/*/issues/new
|
||||
Disallow: /*/*/pulls/*/files
|
||||
Disallow: /*/*/pulls/*/commits
|
||||
|
||||
Disallow: /attachments/
|
||||
Disallow: /*/*/attachments/
|
||||
Disallow: /*/*/issues/*/attachments/
|
||||
Disallow: /*/*/pulls/*/attachments/
|
||||
Disallow: /*/*/releases/attachments
|
||||
Disallow: /*/*/releases/download
|
||||
|
||||
Disallow: /*/*/archive/
|
||||
Disallow: /*.bundle$
|
||||
Disallow: /*.patch$
|
||||
Disallow: /*.diff$
|
||||
Disallow: /*.atom$
|
||||
Disallow: /*.rss$
|
||||
|
||||
Disallow: /*lang=*
|
||||
Disallow: /*redirect_to=*
|
||||
Disallow: /*tab=*
|
||||
Disallow: /*q=*
|
||||
Disallow: /*sort=*
|
||||
Disallow: /*repo-search-archived=*
|
||||
`)
|
||||
|
||||
func RobotsTxt(w http.ResponseWriter, req *http.Request) {
|
||||
httpcache.SetCacheControlInHeader(w.Header(), setting.StaticCacheTime)
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
|
||||
robotsTxt := util.FilePathJoinAbs(setting.CustomPath, "public/robots.txt")
|
||||
if ok, _ := util.IsExist(robotsTxt); ok {
|
||||
http.ServeFile(w, req, robotsTxt)
|
||||
return
|
||||
}
|
||||
|
||||
_, err := w.Write(defaultRobotsTxt)
|
||||
if err != nil {
|
||||
log.Error("failed to write robots.txt: %v", err)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue