All checks were successful
Deploy Smart Search Backend Test / deploy (push) Successful in 1m44s
111 lines
3.2 KiB
Go
111 lines
3.2 KiB
Go
package fileparser
|
||
|
||
import (
|
||
"os"
|
||
"path/filepath"
|
||
"testing"
|
||
|
||
"github.com/stretchr/testify/assert"
|
||
"github.com/stretchr/testify/require"
|
||
)
|
||
|
||
func TestExtractText_EmptyData(t *testing.T) {
|
||
result, err := ExtractText(nil, "test.txt")
|
||
assert.NoError(t, err)
|
||
assert.Empty(t, result)
|
||
|
||
result, err = ExtractText([]byte{}, "test.txt")
|
||
assert.NoError(t, err)
|
||
assert.Empty(t, result)
|
||
}
|
||
|
||
func TestExtractText_PlainText(t *testing.T) {
|
||
content := "Тестовый текст для проверки"
|
||
result, err := ExtractText([]byte(content), "document.txt")
|
||
|
||
assert.NoError(t, err)
|
||
assert.Equal(t, content, result)
|
||
}
|
||
|
||
func TestExtractText_PlainTextWithNewlines(t *testing.T) {
|
||
content := "Первая строка\nВторая строка\nТретья строка"
|
||
result, err := ExtractText([]byte(content), "document.txt")
|
||
|
||
assert.NoError(t, err)
|
||
assert.Equal(t, content, result)
|
||
}
|
||
|
||
func TestExtractText_RealDocxFile(t *testing.T) {
|
||
testdataPath := filepath.Join("testdata", "test_document.docx")
|
||
data, err := os.ReadFile(testdataPath)
|
||
require.NoError(t, err, "не удалось прочитать тестовый файл")
|
||
|
||
result, err := ExtractText(data, "тестовый.docx")
|
||
|
||
assert.NoError(t, err)
|
||
assert.NotEmpty(t, result, "текст из docx не должен быть пустым")
|
||
t.Logf("Извлеченный текст из docx:\n%s", result)
|
||
}
|
||
|
||
func TestExtractText_DocxWithAnyFilename(t *testing.T) {
|
||
testdataPath := filepath.Join("testdata", "test_document.docx")
|
||
data, err := os.ReadFile(testdataPath)
|
||
require.NoError(t, err)
|
||
|
||
result1, err := ExtractText(data, "random_name_without_extension")
|
||
assert.NoError(t, err)
|
||
assert.NotEmpty(t, result1)
|
||
|
||
result2, err := ExtractText(data, "document.pdf")
|
||
assert.NoError(t, err)
|
||
assert.NotEmpty(t, result2)
|
||
|
||
assert.Equal(t, result1, result2, "результат должен быть одинаковым независимо от имени файла")
|
||
}
|
||
|
||
func TestExtractText_UnsupportedFormat_PDF(t *testing.T) {
|
||
pdfHeader := []byte("%PDF-1.4\n")
|
||
result, err := ExtractText(pdfHeader, "document.pdf")
|
||
|
||
assert.Error(t, err)
|
||
assert.Empty(t, result)
|
||
assert.Contains(t, err.Error(), "UNSUPPORTED_FILE_FORMAT")
|
||
}
|
||
|
||
func TestExtractText_UnsupportedFormat_Image(t *testing.T) {
|
||
pngHeader := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}
|
||
result, err := ExtractText(pngHeader, "image.png")
|
||
|
||
assert.Error(t, err)
|
||
assert.Empty(t, result)
|
||
assert.Contains(t, err.Error(), "UNSUPPORTED_FILE_FORMAT")
|
||
}
|
||
|
||
func TestExtractText_InvalidDocx(t *testing.T) {
|
||
zipHeader := []byte{0x50, 0x4B, 0x03, 0x04}
|
||
fakeZip := append(zipHeader, []byte("not a valid zip content")...)
|
||
|
||
result, err := ExtractText(fakeZip, "fake.docx")
|
||
|
||
assert.Error(t, err)
|
||
assert.Empty(t, result)
|
||
}
|
||
|
||
func TestIsDocx_ValidDocx(t *testing.T) {
|
||
testdataPath := filepath.Join("testdata", "test_document.docx")
|
||
data, err := os.ReadFile(testdataPath)
|
||
require.NoError(t, err)
|
||
|
||
assert.True(t, isDocx(data))
|
||
}
|
||
|
||
func TestIsDocx_RegularZip(t *testing.T) {
|
||
zipHeader := []byte{0x50, 0x4B, 0x03, 0x04}
|
||
assert.False(t, isDocx(zipHeader))
|
||
}
|
||
|
||
func TestIsDocx_NotZip(t *testing.T) {
|
||
textData := []byte("plain text content")
|
||
assert.False(t, isDocx(textData))
|
||
}
|