[feature] list commands for both attachment and emojis (#2121)

* [feature] list commands for both attachment and emojis

* use fewer commands, provide `local-only` and `remote-only` as filters

* envparsing

---------

Co-authored-by: Romain de Laage <romain.delaage@rdelaage.ovh>
Co-authored-by: tsmethurst <tobi.smethurst@protonmail.com>
This commit is contained in:
rdelaage 2023-08-23 18:01:16 +02:00 committed by GitHub
parent 8f38dc2e7f
commit 7b48437f17
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 283 additions and 52 deletions

View file

@ -20,6 +20,7 @@
import (
"bufio"
"context"
"errors"
"fmt"
"os"
"path"
@ -34,14 +35,17 @@
)
type list struct {
dbService db.DB
state *state.State
maxID string
limit int
out *bufio.Writer
dbService db.DB
state *state.State
maxID string
limit int
localOnly bool
remoteOnly bool
out *bufio.Writer
}
func (l *list) GetAllMediaPaths(ctx context.Context, filter func(*gtsmodel.MediaAttachment) string) ([]string, error) {
// Get a list of attachment using a custom filter
func (l *list) GetAllAttachmentPaths(ctx context.Context, filter func(*gtsmodel.MediaAttachment) string) ([]string, error) {
res := make([]string, 0, 100)
for {
attachments, err := l.dbService.GetAttachments(ctx, l.maxID, l.limit)
@ -72,8 +76,52 @@ func (l *list) GetAllMediaPaths(ctx context.Context, filter func(*gtsmodel.Media
return res, nil
}
// Get a list of emojis using a custom filter
func (l *list) GetAllEmojisPaths(ctx context.Context, filter func(*gtsmodel.Emoji) string) ([]string, error) {
res := make([]string, 0, 100)
for {
attachments, err := l.dbService.GetEmojis(ctx, l.maxID, l.limit)
if err != nil {
return nil, fmt.Errorf("failed to retrieve media metadata from database: %w", err)
}
for _, a := range attachments {
v := filter(a)
if v != "" {
res = append(res, v)
}
}
// If we got less results than our limit, we've reached the
// last page to retrieve and we can break the loop. If the
// last batch happens to contain exactly the same amount of
// items as the limit we'll end up doing one extra query.
if len(attachments) < l.limit {
break
}
// Grab the last ID from the batch and set it as the maxID
// that'll be used in the next iteration so we don't get items
// we've already seen.
l.maxID = attachments[len(attachments)-1].ID
}
return res, nil
}
func setupList(ctx context.Context) (*list, error) {
var state state.State
var (
localOnly = config.GetAdminMediaListLocalOnly()
remoteOnly = config.GetAdminMediaListRemoteOnly()
state state.State
)
// Validate flags.
if localOnly && remoteOnly {
return nil, errors.New(
"local-only and remote-only flags cannot be true at the same time; " +
"choose one or the other, or set neither to list all media",
)
}
state.Caches.Init()
state.Caches.Start()
@ -87,11 +135,13 @@ func setupList(ctx context.Context) (*list, error) {
state.DB = dbService
return &list{
dbService: dbService,
state: &state,
limit: 200,
maxID: "",
out: bufio.NewWriter(os.Stdout),
dbService: dbService,
state: &state,
limit: 200,
maxID: "",
localOnly: localOnly,
remoteOnly: remoteOnly,
out: bufio.NewWriter(os.Stdout),
}, nil
}
@ -103,7 +153,8 @@ func (l *list) shutdown() error {
return err
}
var ListLocal action.GTSAction = func(ctx context.Context) error {
// ListAttachments lists local, remote, or all attachment paths.
var ListAttachments action.GTSAction = func(ctx context.Context) error {
list, err := setupList(ctx)
if err != nil {
return err
@ -116,26 +167,53 @@ func (l *list) shutdown() error {
}
}()
mediaPath := config.GetStorageLocalBasePath()
media, err := list.GetAllMediaPaths(
ctx,
func(m *gtsmodel.MediaAttachment) string {
if m.RemoteURL == "" {
return path.Join(mediaPath, m.File.Path)
var (
mediaPath = config.GetStorageLocalBasePath()
filter func(*gtsmodel.MediaAttachment) string
)
switch {
case list.localOnly:
filter = func(m *gtsmodel.MediaAttachment) string {
if m.RemoteURL != "" {
// Remote, not
// interested.
return ""
}
return ""
})
return path.Join(mediaPath, m.File.Path)
}
case list.remoteOnly:
filter = func(m *gtsmodel.MediaAttachment) string {
if m.RemoteURL == "" {
// Local, not
// interested.
return ""
}
return path.Join(mediaPath, m.File.Path)
}
default:
filter = func(m *gtsmodel.MediaAttachment) string {
return path.Join(mediaPath, m.File.Path)
}
}
attachments, err := list.GetAllAttachmentPaths(ctx, filter)
if err != nil {
return err
}
for _, m := range media {
_, _ = list.out.WriteString(m + "\n")
for _, a := range attachments {
_, _ = list.out.WriteString(a + "\n")
}
return nil
}
var ListRemote action.GTSAction = func(ctx context.Context) error {
// ListEmojis lists local, remote, or all emoji filepaths.
var ListEmojis action.GTSAction = func(ctx context.Context) error {
list, err := setupList(ctx)
if err != nil {
return err
@ -148,17 +226,47 @@ func(m *gtsmodel.MediaAttachment) string {
}
}()
media, err := list.GetAllMediaPaths(
ctx,
func(m *gtsmodel.MediaAttachment) string {
return m.RemoteURL
})
var (
mediaPath = config.GetStorageLocalBasePath()
filter func(*gtsmodel.Emoji) string
)
switch {
case list.localOnly:
filter = func(e *gtsmodel.Emoji) string {
if e.ImageRemoteURL != "" {
// Remote, not
// interested.
return ""
}
return path.Join(mediaPath, e.ImagePath)
}
case list.remoteOnly:
filter = func(e *gtsmodel.Emoji) string {
if e.ImageRemoteURL == "" {
// Local, not
// interested.
return ""
}
return path.Join(mediaPath, e.ImagePath)
}
default:
filter = func(e *gtsmodel.Emoji) string {
return path.Join(mediaPath, e.ImagePath)
}
}
emojis, err := list.GetAllEmojisPaths(ctx, filter)
if err != nil {
return err
}
for _, m := range media {
_, _ = list.out.WriteString(m + "\n")
for _, e := range emojis {
_, _ = list.out.WriteString(e + "\n")
}
return nil
}

View file

@ -178,29 +178,31 @@ func adminCommands() *cobra.Command {
ADMIN MEDIA LIST COMMANDS
*/
adminMediaListLocalCmd := &cobra.Command{
Use: "list-local",
Short: "admin command to list media on local storage",
adminMediaListAttachmentsCmd := &cobra.Command{
Use: "list-attachments",
Short: "list local, remote, or all attachments",
PreRunE: func(cmd *cobra.Command, args []string) error {
return preRun(preRunArgs{cmd: cmd})
},
RunE: func(cmd *cobra.Command, args []string) error {
return run(cmd.Context(), media.ListLocal)
return run(cmd.Context(), media.ListAttachments)
},
}
config.AddAdminMediaList(adminMediaListAttachmentsCmd)
adminMediaCmd.AddCommand(adminMediaListAttachmentsCmd)
adminMediaListRemoteCmd := &cobra.Command{
Use: "list-remote",
Short: "admin command to list remote media cached on this instance",
adminMediaListEmojisLocalCmd := &cobra.Command{
Use: "list-emojis",
Short: "list local, remote, or all emojis",
PreRunE: func(cmd *cobra.Command, args []string) error {
return preRun(preRunArgs{cmd: cmd})
},
RunE: func(cmd *cobra.Command, args []string) error {
return run(cmd.Context(), media.ListRemote)
return run(cmd.Context(), media.ListEmojis)
},
}
adminMediaCmd.AddCommand(adminMediaListLocalCmd, adminMediaListRemoteCmd)
config.AddAdminMediaList(adminMediaListEmojisLocalCmd)
adminMediaCmd.AddCommand(adminMediaListEmojisLocalCmd)
/*
ADMIN MEDIA PRUNE COMMANDS

View file

@ -255,17 +255,73 @@ Example:
gotosocial admin import --path example.json --config-path config.yaml
```
### gotosocial admin media list-local
### gotosocial admin media list-attachments
This command can be used to list local media. Local media is media that belongs to posts by users with an account on the instance.
Can be used to list the storage paths of local, remote, or all media attachments on your instance (including headers and avatars).
The output will be a list of files. The list can be used to drive your backups.
`local-only` and `remote-only` can be used as filters; they cannot both be set at once.
### gotosocial admin media list-remote
If neither `local-only` or `remote-only` are set, all media attachments on your instance will be listed.
This is the corollary to list-local, but instead lists media from remote instances. Remote media belongs to other instances, but was attached to a post we received over federation and have potentially cached locally.
You may want to run this with `GTS_LOG_LEVEL` set to `warn` or `error`, otherwise it will log a lot of info messages you probably don't need.
The output will be a list of URLs to retrieve the original content from. GoToSocial automatically retrieves remote media when it needs it, so you should never need to do so yourself.
`gotosocial admin media list-attachments --help`:
```text
list local, remote, or all attachments
Usage:
gotosocial admin media list-attachments [flags]
Flags:
-h, --help help for list-attachments
--local-only list only local attachments/emojis; if specified then remote-only cannot also be true
--remote-only list only remote attachments/emojis; if specified then local-only cannot also be true
```
Example output:
```text
/gotosocial/062G5WYKY35KKD12EMSM3F8PJ8/attachment/original/01PFPMWK2FF0D9WMHEJHR07C3R.jpg
/gotosocial/01F8MH1H7YV1Z7D2C8K2730QBF/attachment/original/01PFPMWK2FF0D9WMHEJHR07C3Q.jpg
/gotosocial/01F8MH5ZK5VRH73AKHQM6Y9VNX/attachment/original/01FVW7RXPQ8YJHTEXYPE7Q8ZY0.jpg
/gotosocial/01F8MH1H7YV1Z7D2C8K2730QBF/attachment/original/01F8MH8RMYQ6MSNY3JM2XT1CQ5.jpg
/gotosocial/01F8MH1H7YV1Z7D2C8K2730QBF/attachment/original/01F8MH7TDVANYKWVE8VVKFPJTJ.gif
/gotosocial/01F8MH17FWEB39HZJ76B6VXSKF/attachment/original/01F8MH6NEM8D7527KZAECTCR76.jpg
/gotosocial/01F8MH1H7YV1Z7D2C8K2730QBF/attachment/original/01F8MH58A357CV5K7R7TJMSH6S.jpg
/gotosocial/01F8MH1H7YV1Z7D2C8K2730QBF/attachment/original/01CDR64G398ADCHXK08WWTHEZ5.gif
```
### gotosocial admin media list-emojis
Can be used to list the storage paths of local, remote, or all emojis on your instance.
`local-only` and `remote-only` can be used as filters; they cannot both be set at once.
If neither `local-only` or `remote-only` are set, all emojis on your instance will be listed.
You may want to run this with `GTS_LOG_LEVEL` set to `warn` or `error`, otherwise it will log a lot of info messages you probably don't need.
`gotosocial admin media list-emojis --help`:
```text
list local, remote, or all emojis
Usage:
gotosocial admin media list-emojis [flags]
Flags:
-h, --help help for list-emojis
--local-only list only local attachments/emojis; if specified then remote-only cannot also be true
--remote-only list only remote attachments/emojis; if specified then local-only cannot also be true
```
Example output:
```text
/gotosocial/01AY6P665V14JJR0AFVRT7311Y/emoji/original/01GD5KP5CQEE1R3X43Y1EHS2CW.png
/gotosocial/01AY6P665V14JJR0AFVRT7311Y/emoji/original/01F8MH9H8E4VG3KDYJR9EGPXCQ.png
```
### gotosocial admin media prune orphaned

View file

@ -161,11 +161,13 @@ type Configuration struct {
Cache CacheConfiguration `name:"cache"`
// TODO: move these elsewhere, these are more ephemeral vs long-running flags like above
AdminAccountUsername string `name:"username" usage:"the username to create/delete/etc"`
AdminAccountEmail string `name:"email" usage:"the email address of this account"`
AdminAccountPassword string `name:"password" usage:"the password to set for this account"`
AdminTransPath string `name:"path" usage:"the path of the file to import from/export to"`
AdminMediaPruneDryRun bool `name:"dry-run" usage:"perform a dry run and only log number of items eligible for pruning"`
AdminAccountUsername string `name:"username" usage:"the username to create/delete/etc"`
AdminAccountEmail string `name:"email" usage:"the email address of this account"`
AdminAccountPassword string `name:"password" usage:"the password to set for this account"`
AdminTransPath string `name:"path" usage:"the path of the file to import from/export to"`
AdminMediaPruneDryRun bool `name:"dry-run" usage:"perform a dry run and only log number of items eligible for pruning"`
AdminMediaListLocalOnly bool `name:"local-only" usage:"list only local attachments/emojis; if specified then remote-only cannot also be true"`
AdminMediaListRemoteOnly bool `name:"remote-only" usage:"list only remote attachments/emojis; if specified then local-only cannot also be true"`
RequestIDHeader string `name:"request-id-header" usage:"Header to extract the Request ID from. Eg.,'X-Request-Id'."`
}

View file

@ -203,6 +203,17 @@ func AddAdminTrans(cmd *cobra.Command) {
}
}
// AddAdminMediaList attaches flags pertaining to media list commands.
func AddAdminMediaList(cmd *cobra.Command) {
localOnly := AdminMediaListLocalOnlyFlag()
localOnlyUsage := fieldtag("AdminMediaListLocalOnly", "usage")
cmd.Flags().Bool(localOnly, false, localOnlyUsage)
remoteOnly := AdminMediaListRemoteOnlyFlag()
remoteOnlyUsage := fieldtag("AdminMediaListRemoteOnly", "usage")
cmd.Flags().Bool(remoteOnly, false, remoteOnlyUsage)
}
// AddAdminMediaPrune attaches flags pertaining to media storage prune commands.
func AddAdminMediaPrune(cmd *cobra.Command) {
name := AdminMediaPruneDryRunFlag()

View file

@ -3374,6 +3374,56 @@ func GetAdminMediaPruneDryRun() bool { return global.GetAdminMediaPruneDryRun()
// SetAdminMediaPruneDryRun safely sets the value for global configuration 'AdminMediaPruneDryRun' field
func SetAdminMediaPruneDryRun(v bool) { global.SetAdminMediaPruneDryRun(v) }
// GetAdminMediaListLocalOnly safely fetches the Configuration value for state's 'AdminMediaListLocalOnly' field
func (st *ConfigState) GetAdminMediaListLocalOnly() (v bool) {
st.mutex.RLock()
v = st.config.AdminMediaListLocalOnly
st.mutex.RUnlock()
return
}
// SetAdminMediaListLocalOnly safely sets the Configuration value for state's 'AdminMediaListLocalOnly' field
func (st *ConfigState) SetAdminMediaListLocalOnly(v bool) {
st.mutex.Lock()
defer st.mutex.Unlock()
st.config.AdminMediaListLocalOnly = v
st.reloadToViper()
}
// AdminMediaListLocalOnlyFlag returns the flag name for the 'AdminMediaListLocalOnly' field
func AdminMediaListLocalOnlyFlag() string { return "local-only" }
// GetAdminMediaListLocalOnly safely fetches the value for global configuration 'AdminMediaListLocalOnly' field
func GetAdminMediaListLocalOnly() bool { return global.GetAdminMediaListLocalOnly() }
// SetAdminMediaListLocalOnly safely sets the value for global configuration 'AdminMediaListLocalOnly' field
func SetAdminMediaListLocalOnly(v bool) { global.SetAdminMediaListLocalOnly(v) }
// GetAdminMediaListRemoteOnly safely fetches the Configuration value for state's 'AdminMediaListRemoteOnly' field
func (st *ConfigState) GetAdminMediaListRemoteOnly() (v bool) {
st.mutex.RLock()
v = st.config.AdminMediaListRemoteOnly
st.mutex.RUnlock()
return
}
// SetAdminMediaListRemoteOnly safely sets the Configuration value for state's 'AdminMediaListRemoteOnly' field
func (st *ConfigState) SetAdminMediaListRemoteOnly(v bool) {
st.mutex.Lock()
defer st.mutex.Unlock()
st.config.AdminMediaListRemoteOnly = v
st.reloadToViper()
}
// AdminMediaListRemoteOnlyFlag returns the flag name for the 'AdminMediaListRemoteOnly' field
func AdminMediaListRemoteOnlyFlag() string { return "remote-only" }
// GetAdminMediaListRemoteOnly safely fetches the value for global configuration 'AdminMediaListRemoteOnly' field
func GetAdminMediaListRemoteOnly() bool { return global.GetAdminMediaListRemoteOnly() }
// SetAdminMediaListRemoteOnly safely sets the value for global configuration 'AdminMediaListRemoteOnly' field
func SetAdminMediaListRemoteOnly(v bool) { global.SetAdminMediaListRemoteOnly(v) }
// GetRequestIDHeader safely fetches the Configuration value for state's 'RequestIDHeader' field
func (st *ConfigState) GetRequestIDHeader() (v string) {
st.mutex.RLock()

View file

@ -87,6 +87,7 @@ EXPECT=$(cat << "EOF"
"letsencrypt-email-address": "",
"letsencrypt-enabled": true,
"letsencrypt-port": 80,
"local-only": false,
"log-client-ip": false,
"log-db-queries": true,
"log-level": "info",
@ -116,6 +117,7 @@ EXPECT=$(cat << "EOF"
"path": "",
"port": 6969,
"protocol": "http",
"remote-only": false,
"request-id-header": "X-Trace-Id",
"smtp-disclose-recipients": true,
"smtp-from": "queen.rip.in.piss@terfisland.org",