Richard Mahn 40c8451b7d
Properly determine CSV delimiter (#17459)
* Fixes #16558 CSV delimiter determiner

* Fixes #16558 - properly determine CSV delmiiter

* Moves quoteString to a new function

* Adds big test with lots of commas for tab delimited csv

* Adds comments

* Shortens the text of the test

* Removes single quotes from regexp as only double quotes need to be searched

* Fixes spelling

* Fixes check of length as it probalby will only be 1e4, not greater

* Makes sample size a const, properly removes truncated line

* Makes sample size a const, properly removes truncated line

* Fixes comment

* Fixes comment

* tests for FormatError() function

* Adds logic to find the limiter before or after a quoted value

* Simplifies regex

* Error tests

* Error tests

* Update modules/csv/csv.go

Co-authored-by: delvh <dev.lh@web.de>

* Update modules/csv/csv.go

Co-authored-by: delvh <dev.lh@web.de>

* Adds comments

* Update modules/csv/csv.go

Co-authored-by: delvh <dev.lh@web.de>

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: delvh <dev.lh@web.de>
2021-10-30 23:50:40 +08:00

148 lines
3.4 KiB
Go

// Copyright 2018 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package markup
import (
"bufio"
"bytes"
"html"
"io"
"regexp"
"strconv"
"code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
)
func init() {
markup.RegisterRenderer(Renderer{})
}
// Renderer implements markup.Renderer for csv files
type Renderer struct {
}
// Name implements markup.Renderer
func (Renderer) Name() string {
return "csv"
}
// NeedPostProcess implements markup.Renderer
func (Renderer) NeedPostProcess() bool { return false }
// Extensions implements markup.Renderer
func (Renderer) Extensions() []string {
return []string{".csv", ".tsv"}
}
// SanitizerRules implements markup.Renderer
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
return []setting.MarkupSanitizerRule{
{Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)},
{Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
{Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
}
}
func writeField(w io.Writer, element, class, field string) error {
if _, err := io.WriteString(w, "<"); err != nil {
return err
}
if _, err := io.WriteString(w, element); err != nil {
return err
}
if len(class) > 0 {
if _, err := io.WriteString(w, " class=\""); err != nil {
return err
}
if _, err := io.WriteString(w, class); err != nil {
return err
}
if _, err := io.WriteString(w, "\""); err != nil {
return err
}
}
if _, err := io.WriteString(w, ">"); err != nil {
return err
}
if _, err := io.WriteString(w, html.EscapeString(field)); err != nil {
return err
}
if _, err := io.WriteString(w, "</"); err != nil {
return err
}
if _, err := io.WriteString(w, element); err != nil {
return err
}
_, err := io.WriteString(w, ">")
return err
}
// Render implements markup.Renderer
func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
var tmpBlock = bufio.NewWriter(output)
// FIXME: don't read all to memory
rawBytes, err := io.ReadAll(input)
if err != nil {
return err
}
if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < int64(len(rawBytes)) {
if _, err := tmpBlock.WriteString("<pre>"); err != nil {
return err
}
if _, err := tmpBlock.WriteString(html.EscapeString(string(rawBytes))); err != nil {
return err
}
_, err = tmpBlock.WriteString("</pre>")
return err
}
rd, err := csv.CreateReaderAndDetermineDelimiter(ctx, bytes.NewReader(rawBytes))
if err != nil {
return err
}
if _, err := tmpBlock.WriteString(`<table class="data-table">`); err != nil {
return err
}
row := 1
for {
fields, err := rd.Read()
if err == io.EOF {
break
}
if err != nil {
continue
}
if _, err := tmpBlock.WriteString("<tr>"); err != nil {
return err
}
element := "td"
if row == 1 {
element = "th"
}
if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row)); err != nil {
return err
}
for _, field := range fields {
if err := writeField(tmpBlock, element, "", field); err != nil {
return err
}
}
if _, err := tmpBlock.WriteString("</tr>"); err != nil {
return err
}
row++
}
if _, err = tmpBlock.WriteString("</table>"); err != nil {
return err
}
return tmpBlock.Flush()
}