mirror of
https://github.com/go-gitea/gitea
synced 2025-01-03 00:45:55 +01:00
Refactor iterate git tree
This commit is contained in:
parent
ec771fdfcd
commit
c7359599c7
@ -5,6 +5,7 @@ package actions
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"context"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@ -55,7 +56,7 @@ func ListWorkflows(commit *git.Commit) (git.Entries, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
entries, err := tree.ListEntriesRecursiveFast()
|
entries, err := tree.ListEntriesRecursiveFast(context.Background())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -24,8 +24,15 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
|
|||||||
var sepSpace = []byte{' '}
|
var sepSpace = []byte{' '}
|
||||||
|
|
||||||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
||||||
var err error
|
|
||||||
entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
|
entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
|
||||||
|
return entries, iterateTreeEntries(data, ptree, func(entry *TreeEntry) error {
|
||||||
|
entries = append(entries, entry)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func iterateTreeEntries(data []byte, ptree *Tree, f func(entry *TreeEntry) error) error {
|
||||||
|
var err error
|
||||||
for pos := 0; pos < len(data); {
|
for pos := 0; pos < len(data); {
|
||||||
// expect line to be of the form:
|
// expect line to be of the form:
|
||||||
// <mode> <type> <sha> <space-padded-size>\t<filename>
|
// <mode> <type> <sha> <space-padded-size>\t<filename>
|
||||||
@ -39,7 +46,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
|||||||
line := data[pos:posEnd]
|
line := data[pos:posEnd]
|
||||||
posTab := bytes.IndexByte(line, '\t')
|
posTab := bytes.IndexByte(line, '\t')
|
||||||
if posTab == -1 {
|
if posTab == -1 {
|
||||||
return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
|
return fmt.Errorf("invalid ls-tree output (no tab): %q", line)
|
||||||
}
|
}
|
||||||
|
|
||||||
entry := new(TreeEntry)
|
entry := new(TreeEntry)
|
||||||
@ -69,27 +76,29 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
|||||||
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
|
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
|
||||||
entry.entryMode = EntryModeTree
|
entry.entryMode = EntryModeTree
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
|
return fmt.Errorf("unknown type: %v", string(entryMode))
|
||||||
}
|
}
|
||||||
|
|
||||||
entry.ID, err = NewIDFromString(string(entryObjectID))
|
entry.ID, err = NewIDFromString(string(entryObjectID))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
|
return fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(entryName) > 0 && entryName[0] == '"' {
|
if len(entryName) > 0 && entryName[0] == '"' {
|
||||||
entry.name, err = strconv.Unquote(string(entryName))
|
entry.name, err = strconv.Unquote(string(entryName))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
|
return fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
entry.name = string(entryName)
|
entry.name = string(entryName)
|
||||||
}
|
}
|
||||||
|
|
||||||
pos = posEnd + 1
|
pos = posEnd + 1
|
||||||
entries = append(entries, entry)
|
if err := f(entry); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return entries, nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) {
|
func catBatchParseTreeEntries(objectFormat ObjectFormat, ptree *Tree, rd *bufio.Reader, sz int64) ([]*TreeEntry, error) {
|
||||||
|
@ -54,11 +54,6 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||||||
|
|
||||||
tree := commit.Tree
|
tree := commit.Tree
|
||||||
|
|
||||||
entries, err := tree.ListEntriesRecursiveWithSize()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
checker, deferable := repo.CheckAttributeReader(commitID)
|
checker, deferable := repo.CheckAttributeReader(commitID)
|
||||||
defer deferable()
|
defer deferable()
|
||||||
|
|
||||||
@ -74,10 +69,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||||||
firstExcludedLanguage := ""
|
firstExcludedLanguage := ""
|
||||||
firstExcludedLanguageSize := int64(0)
|
firstExcludedLanguageSize := int64(0)
|
||||||
|
|
||||||
for _, f := range entries {
|
if err := tree.IterateEntriesWithSize(func(f *TreeEntry) error {
|
||||||
select {
|
select {
|
||||||
case <-repo.Ctx.Done():
|
case <-repo.Ctx.Done():
|
||||||
return sizes, repo.Ctx.Err()
|
return repo.Ctx.Err()
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -85,7 +80,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||||||
content = contentBuf.Bytes()
|
content = contentBuf.Bytes()
|
||||||
|
|
||||||
if f.Size() == 0 {
|
if f.Size() == 0 {
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
isVendored := optional.None[bool]()
|
isVendored := optional.None[bool]()
|
||||||
@ -98,22 +93,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||||||
if err == nil {
|
if err == nil {
|
||||||
isVendored = AttributeToBool(attrs, AttributeLinguistVendored)
|
isVendored = AttributeToBool(attrs, AttributeLinguistVendored)
|
||||||
if isVendored.ValueOrDefault(false) {
|
if isVendored.ValueOrDefault(false) {
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated)
|
isGenerated = AttributeToBool(attrs, AttributeLinguistGenerated)
|
||||||
if isGenerated.ValueOrDefault(false) {
|
if isGenerated.ValueOrDefault(false) {
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation)
|
isDocumentation = AttributeToBool(attrs, AttributeLinguistDocumentation)
|
||||||
if isDocumentation.ValueOrDefault(false) {
|
if isDocumentation.ValueOrDefault(false) {
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable)
|
isDetectable = AttributeToBool(attrs, AttributeLinguistDetectable)
|
||||||
if !isDetectable.ValueOrDefault(true) {
|
if !isDetectable.ValueOrDefault(true) {
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
hasLanguage := TryReadLanguageAttribute(attrs)
|
hasLanguage := TryReadLanguageAttribute(attrs)
|
||||||
@ -128,7 +123,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||||||
|
|
||||||
// this language will always be added to the size
|
// this language will always be added to the size
|
||||||
sizes[language] += f.Size()
|
sizes[language] += f.Size()
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -137,19 +132,19 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||||||
enry.IsDotFile(f.Name()) ||
|
enry.IsDotFile(f.Name()) ||
|
||||||
(!isDocumentation.Has() && enry.IsDocumentation(f.Name())) ||
|
(!isDocumentation.Has() && enry.IsDocumentation(f.Name())) ||
|
||||||
enry.IsConfiguration(f.Name()) {
|
enry.IsConfiguration(f.Name()) {
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// If content can not be read or file is too big just do detection by filename
|
// If content can not be read or file is too big just do detection by filename
|
||||||
|
|
||||||
if f.Size() <= bigFileSize {
|
if f.Size() <= bigFileSize {
|
||||||
if err := writeID(f.ID.String()); err != nil {
|
if err := writeID(f.ID.String()); err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
_, _, size, err := ReadBatchLine(batchReader)
|
_, _, size, err := ReadBatchLine(batchReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err)
|
log.Debug("Error reading blob: %s Err: %v", f.ID.String(), err)
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
sizeToRead := size
|
sizeToRead := size
|
||||||
@ -161,22 +156,22 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||||||
|
|
||||||
_, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead))
|
_, err = contentBuf.ReadFrom(io.LimitReader(batchReader, sizeToRead))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
content = contentBuf.Bytes()
|
content = contentBuf.Bytes()
|
||||||
if err := DiscardFull(batchReader, discard); err != nil {
|
if err := DiscardFull(batchReader, discard); err != nil {
|
||||||
return nil, err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !isGenerated.Has() && enry.IsGenerated(f.Name(), content) {
|
if !isGenerated.Has() && enry.IsGenerated(f.Name(), content) {
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
|
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
|
||||||
// - eg. do the all the detection tests using filename first before reading content.
|
// - eg. do the all the detection tests using filename first before reading content.
|
||||||
language := analyze.GetCodeLanguage(f.Name(), content)
|
language := analyze.GetCodeLanguage(f.Name(), content)
|
||||||
if language == "" {
|
if language == "" {
|
||||||
continue
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// group languages, such as Pug -> HTML; SCSS -> CSS
|
// group languages, such as Pug -> HTML; SCSS -> CSS
|
||||||
@ -197,6 +192,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||||||
firstExcludedLanguage = language
|
firstExcludedLanguage = language
|
||||||
firstExcludedLanguageSize += f.Size()
|
firstExcludedLanguageSize += f.Size()
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
|
}); err != nil {
|
||||||
|
return sizes, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// If there are no included languages add the first excluded language
|
// If there are no included languages add the first excluded language
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
package git
|
package git
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
@ -88,34 +90,83 @@ func (t *Tree) ListEntries() (Entries, error) {
|
|||||||
|
|
||||||
// listEntriesRecursive returns all entries of current tree recursively including all subtrees
|
// listEntriesRecursive returns all entries of current tree recursively including all subtrees
|
||||||
// extraArgs could be "-l" to get the size, which is slower
|
// extraArgs could be "-l" to get the size, which is slower
|
||||||
func (t *Tree) listEntriesRecursive(extraArgs TrustedCmdArgs) (Entries, error) {
|
func (t *Tree) listEntriesRecursive(ctx context.Context, extraArgs TrustedCmdArgs) (Entries, error) {
|
||||||
if t.entriesRecursiveParsed {
|
if t.entriesRecursiveParsed {
|
||||||
return t.entriesRecursive, nil
|
return t.entriesRecursive, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-r").
|
t.entriesRecursive = make([]*TreeEntry, 0)
|
||||||
AddArguments(extraArgs...).
|
err := t.iterateEntriesRecursive(func(entry *TreeEntry) error {
|
||||||
AddDynamicArguments(t.ID.String()).
|
select {
|
||||||
RunStdBytes(&RunOpts{Dir: t.repo.Path})
|
case <-ctx.Done():
|
||||||
if runErr != nil {
|
return ctx.Err()
|
||||||
return nil, runErr
|
default:
|
||||||
}
|
}
|
||||||
|
t.entriesRecursive = append(t.entriesRecursive, entry)
|
||||||
var err error
|
return nil
|
||||||
t.entriesRecursive, err = parseTreeEntries(stdout, t)
|
}, extraArgs)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.entriesRecursiveParsed = true
|
t.entriesRecursiveParsed = true
|
||||||
}
|
}
|
||||||
|
|
||||||
return t.entriesRecursive, err
|
return t.entriesRecursive, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size
|
// ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size
|
||||||
func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
|
func (t *Tree) ListEntriesRecursiveFast(ctx context.Context) (Entries, error) {
|
||||||
return t.listEntriesRecursive(nil)
|
return t.listEntriesRecursive(ctx, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees, with size
|
// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees, with size
|
||||||
func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
|
func (t *Tree) ListEntriesRecursiveWithSize(ctx context.Context) (Entries, error) {
|
||||||
return t.listEntriesRecursive(TrustedCmdArgs{"--long"})
|
return t.listEntriesRecursive(ctx, TrustedCmdArgs{"--long"})
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterateEntriesRecursive returns iterate entries of current tree recursively including all subtrees
|
||||||
|
// extraArgs could be "-l" to get the size, which is slower
|
||||||
|
func (t *Tree) iterateEntriesRecursive(f func(entry *TreeEntry) error, extraArgs TrustedCmdArgs) error {
|
||||||
|
if t.entriesRecursiveParsed {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
reader, writer := io.Pipe()
|
||||||
|
done := make(chan error)
|
||||||
|
|
||||||
|
go func(done chan error, writer *io.PipeWriter, reader *io.PipeReader) {
|
||||||
|
runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-r").
|
||||||
|
AddArguments(extraArgs...).
|
||||||
|
AddDynamicArguments(t.ID.String()).
|
||||||
|
Run(&RunOpts{
|
||||||
|
Dir: t.repo.Path,
|
||||||
|
Stdout: writer,
|
||||||
|
})
|
||||||
|
|
||||||
|
_ = writer.Close()
|
||||||
|
_ = reader.Close()
|
||||||
|
|
||||||
|
done <- runErr
|
||||||
|
}(done, writer, reader)
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(reader)
|
||||||
|
for scanner.Scan() {
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
data := scanner.Bytes()
|
||||||
|
if err := iterateTreeEntries(data, t, func(entry *TreeEntry) error {
|
||||||
|
select {
|
||||||
|
case runErr := <-done:
|
||||||
|
return runErr
|
||||||
|
default:
|
||||||
|
return f(entry)
|
||||||
|
}
|
||||||
|
}); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.entriesRecursiveParsed = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Tree) IterateEntriesWithSize(f func(*TreeEntry) error) error {
|
||||||
|
return t.iterateEntriesRecursive(f, TrustedCmdArgs{"--long"})
|
||||||
}
|
}
|
||||||
|
@ -21,7 +21,7 @@ func TreeList(ctx *context.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
entries, err := tree.ListEntriesRecursiveFast()
|
entries, err := tree.ListEntriesRecursiveFast(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
ctx.ServerError("ListEntriesRecursiveFast", err)
|
ctx.ServerError("ListEntriesRecursiveFast", err)
|
||||||
return
|
return
|
||||||
|
@ -28,7 +28,7 @@ func GetTreeBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
|
|||||||
tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA)
|
tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA)
|
||||||
var entries git.Entries
|
var entries git.Entries
|
||||||
if recursive {
|
if recursive {
|
||||||
entries, err = gitTree.ListEntriesRecursiveWithSize()
|
entries, err = gitTree.ListEntriesRecursiveWithSize(ctx)
|
||||||
} else {
|
} else {
|
||||||
entries, err = gitTree.ListEntries()
|
entries, err = gitTree.ListEntries()
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user