Files
smart-search-back/pkg/fileparser/parser_test.go
vallyenfail 7e73144486
All checks were successful
Deploy Smart Search Backend Test / deploy (push) Successful in 1m44s
add service
2026-01-20 14:26:27 +03:00

111 lines
3.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package fileparser
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestExtractText_EmptyData(t *testing.T) {
result, err := ExtractText(nil, "test.txt")
assert.NoError(t, err)
assert.Empty(t, result)
result, err = ExtractText([]byte{}, "test.txt")
assert.NoError(t, err)
assert.Empty(t, result)
}
func TestExtractText_PlainText(t *testing.T) {
content := "Тестовый текст для проверки"
result, err := ExtractText([]byte(content), "document.txt")
assert.NoError(t, err)
assert.Equal(t, content, result)
}
func TestExtractText_PlainTextWithNewlines(t *testing.T) {
content := "Первая строка\nВторая строка\nТретья строка"
result, err := ExtractText([]byte(content), "document.txt")
assert.NoError(t, err)
assert.Equal(t, content, result)
}
func TestExtractText_RealDocxFile(t *testing.T) {
testdataPath := filepath.Join("testdata", "test_document.docx")
data, err := os.ReadFile(testdataPath)
require.NoError(t, err, "не удалось прочитать тестовый файл")
result, err := ExtractText(data, "тестовый.docx")
assert.NoError(t, err)
assert.NotEmpty(t, result, "текст из docx не должен быть пустым")
t.Logf("Извлеченный текст из docx:\n%s", result)
}
func TestExtractText_DocxWithAnyFilename(t *testing.T) {
testdataPath := filepath.Join("testdata", "test_document.docx")
data, err := os.ReadFile(testdataPath)
require.NoError(t, err)
result1, err := ExtractText(data, "random_name_without_extension")
assert.NoError(t, err)
assert.NotEmpty(t, result1)
result2, err := ExtractText(data, "document.pdf")
assert.NoError(t, err)
assert.NotEmpty(t, result2)
assert.Equal(t, result1, result2, "результат должен быть одинаковым независимо от имени файла")
}
func TestExtractText_UnsupportedFormat_PDF(t *testing.T) {
pdfHeader := []byte("%PDF-1.4\n")
result, err := ExtractText(pdfHeader, "document.pdf")
assert.Error(t, err)
assert.Empty(t, result)
assert.Contains(t, err.Error(), "UNSUPPORTED_FILE_FORMAT")
}
func TestExtractText_UnsupportedFormat_Image(t *testing.T) {
pngHeader := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}
result, err := ExtractText(pngHeader, "image.png")
assert.Error(t, err)
assert.Empty(t, result)
assert.Contains(t, err.Error(), "UNSUPPORTED_FILE_FORMAT")
}
func TestExtractText_InvalidDocx(t *testing.T) {
zipHeader := []byte{0x50, 0x4B, 0x03, 0x04}
fakeZip := append(zipHeader, []byte("not a valid zip content")...)
result, err := ExtractText(fakeZip, "fake.docx")
assert.Error(t, err)
assert.Empty(t, result)
}
func TestIsDocx_ValidDocx(t *testing.T) {
testdataPath := filepath.Join("testdata", "test_document.docx")
data, err := os.ReadFile(testdataPath)
require.NoError(t, err)
assert.True(t, isDocx(data))
}
func TestIsDocx_RegularZip(t *testing.T) {
zipHeader := []byte{0x50, 0x4B, 0x03, 0x04}
assert.False(t, isDocx(zipHeader))
}
func TestIsDocx_NotZip(t *testing.T) {
textData := []byte("plain text content")
assert.False(t, isDocx(textData))
}