Compare commits

...

10 Commits

Author SHA1 Message Date
Henrique Pimentel
465d1c5e74
Merge 3a6cf74f58 into 821d2fc2a3 2024-05-17 23:19:41 +01:00
wxiaoguang
821d2fc2a3
Simplify mirror repository API logic (#30963)
Fix #30921
2024-05-17 16:07:41 +00:00
Henrique Pimentel
3a6cf74f58 Reduced the maxRows limit from 100k to 2.5k 2024-05-14 14:43:55 +01:00
Henrique Pimentel
19be9d2173 Reduced the maxRows limit from 100k to 2.5k 2024-05-14 14:40:12 +01:00
Henrique Pimentel
b8cb3582a2 Small upgrades
- Removed tags from sanitizer rules;
- The warning message now is a table (to reuse UI elements);
- ctx.RelativePath escaped;
- Implemented a panic catch when getting a translation error;
2024-05-14 14:40:12 +01:00
Henrique Pimentel
90030ac7a8 Increased limit to 100k lines, warning message is now on bottom and linked to raw 2024-05-14 14:40:12 +01:00
Henrique Pimentel
145874831e Fix row numbering
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2024-05-14 14:40:12 +01:00
Henrique Pimentel
1cd1c9686d Update MaxRow 2024-05-14 14:40:12 +01:00
Henrique Pimentel
82c48a6cc4 Update warning message design 2024-05-14 14:40:12 +01:00
Henrique Pimentel
943ca1480a Fix CSV rendering (#29663)
Fixes #29663

Previously, when a CSV file was larger than the limit, the render function lost its function to render the code. There were also multiple reads to the file, in order to determine its size and render or pre-render.

This solution implements a new config variable MAX_ROWS, which corresponds to the “Maximum allowed rows to render CSV files. (0 for no limit)” and rewrites the Render function for CSV files in markup module. Now the render function only reads the file once, having MAX_FILE_SIZE+1 as a reader limit and MAX_ROWS as a row limit. When the file is larger than MAX_FILE_SIZE or has more rows than MAX_ROWS, it only renders until the limit, and displays a user-friendly warning informing that the rendered data is not complete, in the user's language.

The warning: ![image](https://s3.amazonaws.com/i.snag.gy/ieROGx.jpg)
2024-05-14 14:40:12 +01:00
7 changed files with 46 additions and 80 deletions

View File

@ -1334,6 +1334,9 @@ LEVEL = Info
;;
;; Maximum allowed file size in bytes to render CSV files as table. (Set to 0 for no limit).
;MAX_FILE_SIZE = 524288
;;
;; Maximum allowed rows to render CSV files. (Set to 0 for no limit)
;MAX_ROWS = 2500
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@ -5,8 +5,7 @@ package markup
import (
"bufio"
"bytes"
"fmt"
"code.gitea.io/gitea/modules/util"
"html"
"io"
"regexp"
@ -15,6 +14,7 @@ import (
"code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation"
)
func init() {
@ -81,86 +81,38 @@ func writeField(w io.Writer, element, class, field string) error {
func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
tmpBlock := bufio.NewWriter(output)
maxSize := setting.UI.CSV.MaxFileSize
maxRows := setting.UI.CSV.MaxRows
if maxSize == 0 {
return r.tableRender(ctx, input, tmpBlock)
if maxSize != 0 {
input = io.LimitReader(input, maxSize+1)
}
rawBytes, err := io.ReadAll(io.LimitReader(input, maxSize+1))
if err != nil {
return err
}
if int64(len(rawBytes)) <= maxSize {
return r.tableRender(ctx, bytes.NewReader(rawBytes), tmpBlock)
}
return r.fallbackRender(io.MultiReader(bytes.NewReader(rawBytes), input), tmpBlock)
}
func (Renderer) fallbackRender(input io.Reader, tmpBlock *bufio.Writer) error {
_, err := tmpBlock.WriteString("<pre>")
if err != nil {
return err
}
scan := bufio.NewScanner(input)
scan.Split(bufio.ScanRunes)
for scan.Scan() {
switch scan.Text() {
case `&`:
_, err = tmpBlock.WriteString("&amp;")
case `'`:
_, err = tmpBlock.WriteString("&#39;") // "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
case `<`:
_, err = tmpBlock.WriteString("&lt;")
case `>`:
_, err = tmpBlock.WriteString("&gt;")
case `"`:
_, err = tmpBlock.WriteString("&#34;") // "&#34;" is shorter than "&quot;".
default:
_, err = tmpBlock.Write(scan.Bytes())
}
if err != nil {
return err
}
}
if err = scan.Err(); err != nil {
return fmt.Errorf("fallbackRender scan: %w", err)
}
_, err = tmpBlock.WriteString("</pre>")
if err != nil {
return err
}
return tmpBlock.Flush()
}
func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock *bufio.Writer) error {
rd, err := csv.CreateReaderAndDetermineDelimiter(ctx, input)
if err != nil {
return err
}
if _, err := tmpBlock.WriteString(`<table class="data-table">`); err != nil {
return err
}
row := 1
row := 0
for {
fields, err := rd.Read()
if err == io.EOF {
if err == io.EOF || (row >= maxRows && maxRows != 0) {
break
}
if err != nil {
continue
}
if _, err := tmpBlock.WriteString("<tr>"); err != nil {
return err
}
element := "td"
if row == 1 {
if row == 0 {
element = "th"
}
if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row)); err != nil {
if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row+1)); err != nil {
return err
}
for _, field := range fields {
@ -174,8 +126,32 @@ func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock
row++
}
if _, err = tmpBlock.WriteString("</table>"); err != nil {
return err
}
// Check if maxRows or maxSize is reached, and if true, warn.
if (row >= maxRows && maxRows != 0) || (rd.InputOffset() >= maxSize && maxSize != 0) {
warn := `<table class="data-table"><tr><td>`
rawLink := ` <a href="` + ctx.Links.RawLink() + `/` + util.PathEscapeSegments(ctx.RelativePath) + `">`
// Try to get the user translation
if locale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale); ok {
warn += locale.TrString("repo.file_too_large")
rawLink += locale.TrString("repo.file_view_raw")
} else {
warn += "The file is too large to be shown."
rawLink += "View Raw"
}
warn += rawLink + `</a></td></tr></table>`
// Write the HTML string to the output
if _, err := tmpBlock.WriteString(warn); err != nil {
return err
}
}
return tmpBlock.Flush()
}

View File

@ -4,8 +4,6 @@
package markup
import (
"bufio"
"bytes"
"strings"
"testing"
@ -31,12 +29,4 @@ func TestRenderCSV(t *testing.T) {
assert.NoError(t, err)
assert.EqualValues(t, v, buf.String())
}
t.Run("fallbackRender", func(t *testing.T) {
var buf bytes.Buffer
err := render.fallbackRender(strings.NewReader("1,<a>\n2,<b>"), bufio.NewWriter(&buf))
assert.NoError(t, err)
want := "<pre>1,&lt;a&gt;\n2,&lt;b&gt;</pre>"
assert.Equal(t, want, buf.String())
})
}

View File

@ -52,6 +52,7 @@ var UI = struct {
CSV struct {
MaxFileSize int64
MaxRows int
} `ini:"ui.csv"`
Admin struct {
@ -107,8 +108,10 @@ var UI = struct {
},
CSV: struct {
MaxFileSize int64
MaxRows int
}{
MaxFileSize: 524288,
MaxRows: 2500,
},
Admin: struct {
UserPagingNum int

View File

@ -217,7 +217,7 @@ type EditRepoOption struct {
Archived *bool `json:"archived,omitempty"`
// set to a string like `8h30m0s` to set the mirror interval time
MirrorInterval *string `json:"mirror_interval,omitempty"`
// enable prune - remove obsolete remote-tracking references
// enable prune - remove obsolete remote-tracking references when mirroring
EnablePrune *bool `json:"enable_prune,omitempty"`
}

View File

@ -1062,16 +1062,10 @@ func updateRepoArchivedState(ctx *context.APIContext, opts api.EditRepoOption) e
func updateMirror(ctx *context.APIContext, opts api.EditRepoOption) error {
repo := ctx.Repo.Repository
// only update mirror if interval or enable prune are provided
if opts.MirrorInterval == nil && opts.EnablePrune == nil {
return nil
}
// these values only make sense if the repo is a mirror
// Skip this update if the repo is not a mirror, do not return error.
// Because reporting errors only makes the logic more complex&fragile, it doesn't really help end users.
if !repo.IsMirror {
err := fmt.Errorf("repo is not a mirror, can not change mirror interval")
ctx.Error(http.StatusUnprocessableEntity, err.Error(), err)
return err
return nil
}
// get the mirror from the repo

View File

@ -20753,7 +20753,7 @@
"x-go-name": "Description"
},
"enable_prune": {
"description": "enable prune - remove obsolete remote-tracking references",
"description": "enable prune - remove obsolete remote-tracking references when mirroring",
"type": "boolean",
"x-go-name": "EnablePrune"
},