From 7e7314448633cf4f3375ed2c9c8f544e3a11b5cc Mon Sep 17 00:00:00 2001 From: vallyenfail Date: Tue, 20 Jan 2026 14:26:27 +0300 Subject: [PATCH] add service --- internal/grpc/request_handler.go | 7 +- internal/mocks/request_service_mock.go | 90 +++++++++++++--- internal/service/interfaces.go | 2 +- internal/service/request.go | 26 ++++- pkg/errors/codes.go | 2 + pkg/fileparser/parser.go | 113 +++++++++++++++++++++ pkg/fileparser/parser_test.go | 110 ++++++++++++++++++++ pkg/fileparser/testdata/test_document.docx | Bin 0 -> 13165 bytes 8 files changed, 325 insertions(+), 25 deletions(-) create mode 100644 pkg/fileparser/parser.go create mode 100644 pkg/fileparser/parser_test.go create mode 100644 pkg/fileparser/testdata/test_document.docx diff --git a/internal/grpc/request_handler.go b/internal/grpc/request_handler.go index 287e48d..a3ea20a 100644 --- a/internal/grpc/request_handler.go +++ b/internal/grpc/request_handler.go @@ -11,12 +11,7 @@ import ( ) func (h *RequestHandler) CreateTZ(ctx context.Context, req *pb.CreateTZRequest) (*pb.CreateTZResponse, error) { - requestTxt := req.RequestTxt - if len(req.FileData) > 0 { - requestTxt += "\n[File: " + req.FileName + "]" - } - - requestID, tzText, err := h.requestService.CreateTZ(ctx, int(req.UserId), requestTxt) + requestID, tzText, err := h.requestService.CreateTZ(ctx, int(req.UserId), req.RequestTxt, req.FileData, req.FileName) if err != nil { return nil, errors.ToGRPCError(err, h.logger, "RequestService.CreateTZ") } diff --git a/internal/mocks/request_service_mock.go b/internal/mocks/request_service_mock.go index c5a4cdb..4414708 100644 --- a/internal/mocks/request_service_mock.go +++ b/internal/mocks/request_service_mock.go @@ -27,9 +27,9 @@ type RequestServiceMock struct { beforeApproveTZCounter uint64 ApproveTZMock mRequestServiceMockApproveTZ - funcCreateTZ func(ctx context.Context, userID int, requestTxt string) (u1 uuid.UUID, s1 string, err error) + funcCreateTZ func(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (u1 uuid.UUID, s1 string, err error) funcCreateTZOrigin string - inspectFuncCreateTZ func(ctx context.Context, userID int, requestTxt string) + inspectFuncCreateTZ func(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) afterCreateTZCounter uint64 beforeCreateTZCounter uint64 CreateTZMock mRequestServiceMockCreateTZ @@ -508,6 +508,8 @@ type RequestServiceMockCreateTZParams struct { ctx context.Context userID int requestTxt string + fileData []byte + fileName string } // RequestServiceMockCreateTZParamPtrs contains pointers to parameters of the RequestService.CreateTZ @@ -515,6 +517,8 @@ type RequestServiceMockCreateTZParamPtrs struct { ctx *context.Context userID *int requestTxt *string + fileData *[]byte + fileName *string } // RequestServiceMockCreateTZResults contains results of the RequestService.CreateTZ @@ -530,6 +534,8 @@ type RequestServiceMockCreateTZExpectationOrigins struct { originCtx string originUserID string originRequestTxt string + originFileData string + originFileName string } // Marks this method to be optional. The default behavior of any method with Return() is '1 or more', meaning @@ -543,7 +549,7 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) Optional() *mRequestServiceMockCr } // Expect sets up expected params for RequestService.CreateTZ -func (mmCreateTZ *mRequestServiceMockCreateTZ) Expect(ctx context.Context, userID int, requestTxt string) *mRequestServiceMockCreateTZ { +func (mmCreateTZ *mRequestServiceMockCreateTZ) Expect(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) *mRequestServiceMockCreateTZ { if mmCreateTZ.mock.funcCreateTZ != nil { mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Set") } @@ -556,7 +562,7 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) Expect(ctx context.Context, userI mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by ExpectParams functions") } - mmCreateTZ.defaultExpectation.params = &RequestServiceMockCreateTZParams{ctx, userID, requestTxt} + mmCreateTZ.defaultExpectation.params = &RequestServiceMockCreateTZParams{ctx, userID, requestTxt, fileData, fileName} mmCreateTZ.defaultExpectation.expectationOrigins.origin = minimock.CallerInfo(1) for _, e := range mmCreateTZ.expectations { if minimock.Equal(e.params, mmCreateTZ.defaultExpectation.params) { @@ -636,8 +642,54 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) ExpectRequestTxtParam3(requestTxt return mmCreateTZ } +// ExpectFileDataParam4 sets up expected param fileData for RequestService.CreateTZ +func (mmCreateTZ *mRequestServiceMockCreateTZ) ExpectFileDataParam4(fileData []byte) *mRequestServiceMockCreateTZ { + if mmCreateTZ.mock.funcCreateTZ != nil { + mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Set") + } + + if mmCreateTZ.defaultExpectation == nil { + mmCreateTZ.defaultExpectation = &RequestServiceMockCreateTZExpectation{} + } + + if mmCreateTZ.defaultExpectation.params != nil { + mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Expect") + } + + if mmCreateTZ.defaultExpectation.paramPtrs == nil { + mmCreateTZ.defaultExpectation.paramPtrs = &RequestServiceMockCreateTZParamPtrs{} + } + mmCreateTZ.defaultExpectation.paramPtrs.fileData = &fileData + mmCreateTZ.defaultExpectation.expectationOrigins.originFileData = minimock.CallerInfo(1) + + return mmCreateTZ +} + +// ExpectFileNameParam5 sets up expected param fileName for RequestService.CreateTZ +func (mmCreateTZ *mRequestServiceMockCreateTZ) ExpectFileNameParam5(fileName string) *mRequestServiceMockCreateTZ { + if mmCreateTZ.mock.funcCreateTZ != nil { + mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Set") + } + + if mmCreateTZ.defaultExpectation == nil { + mmCreateTZ.defaultExpectation = &RequestServiceMockCreateTZExpectation{} + } + + if mmCreateTZ.defaultExpectation.params != nil { + mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Expect") + } + + if mmCreateTZ.defaultExpectation.paramPtrs == nil { + mmCreateTZ.defaultExpectation.paramPtrs = &RequestServiceMockCreateTZParamPtrs{} + } + mmCreateTZ.defaultExpectation.paramPtrs.fileName = &fileName + mmCreateTZ.defaultExpectation.expectationOrigins.originFileName = minimock.CallerInfo(1) + + return mmCreateTZ +} + // Inspect accepts an inspector function that has same arguments as the RequestService.CreateTZ -func (mmCreateTZ *mRequestServiceMockCreateTZ) Inspect(f func(ctx context.Context, userID int, requestTxt string)) *mRequestServiceMockCreateTZ { +func (mmCreateTZ *mRequestServiceMockCreateTZ) Inspect(f func(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string)) *mRequestServiceMockCreateTZ { if mmCreateTZ.mock.inspectFuncCreateTZ != nil { mmCreateTZ.mock.t.Fatalf("Inspect function is already set for RequestServiceMock.CreateTZ") } @@ -662,7 +714,7 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) Return(u1 uuid.UUID, s1 string, e } // Set uses given function f to mock the RequestService.CreateTZ method -func (mmCreateTZ *mRequestServiceMockCreateTZ) Set(f func(ctx context.Context, userID int, requestTxt string) (u1 uuid.UUID, s1 string, err error)) *RequestServiceMock { +func (mmCreateTZ *mRequestServiceMockCreateTZ) Set(f func(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (u1 uuid.UUID, s1 string, err error)) *RequestServiceMock { if mmCreateTZ.defaultExpectation != nil { mmCreateTZ.mock.t.Fatalf("Default expectation is already set for the RequestService.CreateTZ method") } @@ -678,14 +730,14 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) Set(f func(ctx context.Context, u // When sets expectation for the RequestService.CreateTZ which will trigger the result defined by the following // Then helper -func (mmCreateTZ *mRequestServiceMockCreateTZ) When(ctx context.Context, userID int, requestTxt string) *RequestServiceMockCreateTZExpectation { +func (mmCreateTZ *mRequestServiceMockCreateTZ) When(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) *RequestServiceMockCreateTZExpectation { if mmCreateTZ.mock.funcCreateTZ != nil { mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Set") } expectation := &RequestServiceMockCreateTZExpectation{ mock: mmCreateTZ.mock, - params: &RequestServiceMockCreateTZParams{ctx, userID, requestTxt}, + params: &RequestServiceMockCreateTZParams{ctx, userID, requestTxt, fileData, fileName}, expectationOrigins: RequestServiceMockCreateTZExpectationOrigins{origin: minimock.CallerInfo(1)}, } mmCreateTZ.expectations = append(mmCreateTZ.expectations, expectation) @@ -720,17 +772,17 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) invocationsDone() bool { } // CreateTZ implements mm_service.RequestService -func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, requestTxt string) (u1 uuid.UUID, s1 string, err error) { +func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (u1 uuid.UUID, s1 string, err error) { mm_atomic.AddUint64(&mmCreateTZ.beforeCreateTZCounter, 1) defer mm_atomic.AddUint64(&mmCreateTZ.afterCreateTZCounter, 1) mmCreateTZ.t.Helper() if mmCreateTZ.inspectFuncCreateTZ != nil { - mmCreateTZ.inspectFuncCreateTZ(ctx, userID, requestTxt) + mmCreateTZ.inspectFuncCreateTZ(ctx, userID, requestTxt, fileData, fileName) } - mm_params := RequestServiceMockCreateTZParams{ctx, userID, requestTxt} + mm_params := RequestServiceMockCreateTZParams{ctx, userID, requestTxt, fileData, fileName} // Record call args mmCreateTZ.CreateTZMock.mutex.Lock() @@ -749,7 +801,7 @@ func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, mm_want := mmCreateTZ.CreateTZMock.defaultExpectation.params mm_want_ptrs := mmCreateTZ.CreateTZMock.defaultExpectation.paramPtrs - mm_got := RequestServiceMockCreateTZParams{ctx, userID, requestTxt} + mm_got := RequestServiceMockCreateTZParams{ctx, userID, requestTxt, fileData, fileName} if mm_want_ptrs != nil { @@ -768,6 +820,16 @@ func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, mmCreateTZ.CreateTZMock.defaultExpectation.expectationOrigins.originRequestTxt, *mm_want_ptrs.requestTxt, mm_got.requestTxt, minimock.Diff(*mm_want_ptrs.requestTxt, mm_got.requestTxt)) } + if mm_want_ptrs.fileData != nil && !minimock.Equal(*mm_want_ptrs.fileData, mm_got.fileData) { + mmCreateTZ.t.Errorf("RequestServiceMock.CreateTZ got unexpected parameter fileData, expected at\n%s:\nwant: %#v\n got: %#v%s\n", + mmCreateTZ.CreateTZMock.defaultExpectation.expectationOrigins.originFileData, *mm_want_ptrs.fileData, mm_got.fileData, minimock.Diff(*mm_want_ptrs.fileData, mm_got.fileData)) + } + + if mm_want_ptrs.fileName != nil && !minimock.Equal(*mm_want_ptrs.fileName, mm_got.fileName) { + mmCreateTZ.t.Errorf("RequestServiceMock.CreateTZ got unexpected parameter fileName, expected at\n%s:\nwant: %#v\n got: %#v%s\n", + mmCreateTZ.CreateTZMock.defaultExpectation.expectationOrigins.originFileName, *mm_want_ptrs.fileName, mm_got.fileName, minimock.Diff(*mm_want_ptrs.fileName, mm_got.fileName)) + } + } else if mm_want != nil && !minimock.Equal(*mm_want, mm_got) { mmCreateTZ.t.Errorf("RequestServiceMock.CreateTZ got unexpected parameters, expected at\n%s:\nwant: %#v\n got: %#v%s\n", mmCreateTZ.CreateTZMock.defaultExpectation.expectationOrigins.origin, *mm_want, mm_got, minimock.Diff(*mm_want, mm_got)) @@ -780,9 +842,9 @@ func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, return (*mm_results).u1, (*mm_results).s1, (*mm_results).err } if mmCreateTZ.funcCreateTZ != nil { - return mmCreateTZ.funcCreateTZ(ctx, userID, requestTxt) + return mmCreateTZ.funcCreateTZ(ctx, userID, requestTxt, fileData, fileName) } - mmCreateTZ.t.Fatalf("Unexpected call to RequestServiceMock.CreateTZ. %v %v %v", ctx, userID, requestTxt) + mmCreateTZ.t.Fatalf("Unexpected call to RequestServiceMock.CreateTZ. %v %v %v %v %v", ctx, userID, requestTxt, fileData, fileName) return } diff --git a/internal/service/interfaces.go b/internal/service/interfaces.go index c2ca84e..1634290 100644 --- a/internal/service/interfaces.go +++ b/internal/service/interfaces.go @@ -29,7 +29,7 @@ type InviteService interface { } type RequestService interface { - CreateTZ(ctx context.Context, userID int, requestTxt string) (uuid.UUID, string, error) + CreateTZ(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (uuid.UUID, string, error) ApproveTZ(ctx context.Context, requestID uuid.UUID, tzText string, userID int) ([]*model.Supplier, error) GetMailingList(ctx context.Context, userID int) ([]*model.Request, error) GetMailingListByID(ctx context.Context, requestID uuid.UUID, userID int) (*model.RequestDetail, error) diff --git a/internal/service/request.go b/internal/service/request.go index 6c2af6c..769f19d 100644 --- a/internal/service/request.go +++ b/internal/service/request.go @@ -2,12 +2,14 @@ package service import ( "context" + "fmt" "math" "git.techease.ru/Smart-search/smart-search-back/internal/ai" "git.techease.ru/Smart-search/smart-search-back/internal/model" "git.techease.ru/Smart-search/smart-search-back/internal/repository" "git.techease.ru/Smart-search/smart-search-back/pkg/errors" + "git.techease.ru/Smart-search/smart-search-back/pkg/fileparser" "github.com/google/uuid" "github.com/jackc/pgx/v5" ) @@ -42,21 +44,37 @@ func NewRequestService( } } -func (s *requestService) CreateTZ(ctx context.Context, userID int, requestTxt string) (uuid.UUID, string, error) { +func (s *requestService) CreateTZ(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (uuid.UUID, string, error) { + combinedText := requestTxt + + if len(fileData) > 0 && fileName != "" { + fileContent, err := fileparser.ExtractText(fileData, fileName) + if err != nil { + return uuid.Nil, "", err + } + if fileContent != "" { + if combinedText != "" { + combinedText = fmt.Sprintf("%s\n\nСодержимое файла (%s):\n%s", combinedText, fileName, fileContent) + } else { + combinedText = fmt.Sprintf("Содержимое файла (%s):\n%s", fileName, fileContent) + } + } + } + req := &model.Request{ UserID: userID, - RequestTxt: requestTxt, + RequestTxt: combinedText, } if err := s.requestRepo.Create(ctx, req); err != nil { return uuid.Nil, "", err } - if requestTxt == "" { + if combinedText == "" { return req.ID, "", nil } - tzText, err := s.openAI.GenerateTZ(requestTxt) + tzText, err := s.openAI.GenerateTZ(combinedText) if err != nil { if err := s.requestRepo.UpdateWithTZ(ctx, req.ID, "", false); err != nil { return req.ID, "", err diff --git a/pkg/errors/codes.go b/pkg/errors/codes.go index 648c609..e6017e3 100644 --- a/pkg/errors/codes.go +++ b/pkg/errors/codes.go @@ -12,6 +12,8 @@ const ( UserNotFound = "USER_NOT_FOUND" RequestNotFound = "REQUEST_NOT_FOUND" PermissionDenied = "PERMISSION_DENIED" + UnsupportedFileFormat = "UNSUPPORTED_FILE_FORMAT" + FileProcessingError = "FILE_PROCESSING_ERROR" DatabaseError = "DATABASE_ERROR" EncryptionError = "ENCRYPTION_ERROR" diff --git a/pkg/fileparser/parser.go b/pkg/fileparser/parser.go new file mode 100644 index 0000000..3d11386 --- /dev/null +++ b/pkg/fileparser/parser.go @@ -0,0 +1,113 @@ +package fileparser + +import ( + "archive/zip" + "bytes" + "encoding/xml" + "io" + "net/http" + "strings" + + "git.techease.ru/Smart-search/smart-search-back/pkg/errors" +) + +func ExtractText(data []byte, _ string) (string, error) { + if len(data) == 0 { + return "", nil + } + + mimeType := http.DetectContentType(data) + + switch { + case strings.HasPrefix(mimeType, "text/"): + return string(data), nil + case mimeType == "application/zip" || mimeType == "application/octet-stream": + if isDocx(data) { + return extractDocx(data) + } + return "", errors.NewBusinessError(errors.UnsupportedFileFormat, "поддерживаются только текстовые файлы (.txt) и документы Word (.docx)") + default: + return "", errors.NewBusinessError(errors.UnsupportedFileFormat, "неподдерживаемый формат файла: "+mimeType+", поддерживаются .txt и .docx") + } +} + +func isDocx(data []byte) bool { + reader, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return false + } + + for _, file := range reader.File { + if file.Name == "word/document.xml" { + return true + } + } + return false +} + +func extractDocx(data []byte) (string, error) { + reader, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return "", errors.NewInternalError(errors.FileProcessingError, "не удалось прочитать docx файл", err) + } + + var content string + for _, file := range reader.File { + if file.Name == "word/document.xml" { + rc, err := file.Open() + if err != nil { + return "", errors.NewInternalError(errors.FileProcessingError, "не удалось открыть содержимое документа", err) + } + defer func() { _ = rc.Close() }() + + xmlData, err := io.ReadAll(rc) + if err != nil { + return "", errors.NewInternalError(errors.FileProcessingError, "не удалось прочитать содержимое документа", err) + } + + content = extractTextFromXML(xmlData) + break + } + } + + return content, nil +} + +type docxDocument struct { + XMLName xml.Name `xml:"document"` + Body docxBody `xml:"body"` +} + +type docxBody struct { + Paragraphs []docxParagraph `xml:"p"` +} + +type docxParagraph struct { + Runs []docxRun `xml:"r"` +} + +type docxRun struct { + Text string `xml:"t"` +} + +func extractTextFromXML(data []byte) string { + var doc docxDocument + if err := xml.Unmarshal(data, &doc); err != nil { + return "" + } + + var result []string + for _, p := range doc.Body.Paragraphs { + var line []string + for _, r := range p.Runs { + if r.Text != "" { + line = append(line, r.Text) + } + } + if len(line) > 0 { + result = append(result, strings.Join(line, "")) + } + } + + return strings.Join(result, "\n") +} diff --git a/pkg/fileparser/parser_test.go b/pkg/fileparser/parser_test.go new file mode 100644 index 0000000..07650ca --- /dev/null +++ b/pkg/fileparser/parser_test.go @@ -0,0 +1,110 @@ +package fileparser + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestExtractText_EmptyData(t *testing.T) { + result, err := ExtractText(nil, "test.txt") + assert.NoError(t, err) + assert.Empty(t, result) + + result, err = ExtractText([]byte{}, "test.txt") + assert.NoError(t, err) + assert.Empty(t, result) +} + +func TestExtractText_PlainText(t *testing.T) { + content := "Тестовый текст для проверки" + result, err := ExtractText([]byte(content), "document.txt") + + assert.NoError(t, err) + assert.Equal(t, content, result) +} + +func TestExtractText_PlainTextWithNewlines(t *testing.T) { + content := "Первая строка\nВторая строка\nТретья строка" + result, err := ExtractText([]byte(content), "document.txt") + + assert.NoError(t, err) + assert.Equal(t, content, result) +} + +func TestExtractText_RealDocxFile(t *testing.T) { + testdataPath := filepath.Join("testdata", "test_document.docx") + data, err := os.ReadFile(testdataPath) + require.NoError(t, err, "не удалось прочитать тестовый файл") + + result, err := ExtractText(data, "тестовый.docx") + + assert.NoError(t, err) + assert.NotEmpty(t, result, "текст из docx не должен быть пустым") + t.Logf("Извлеченный текст из docx:\n%s", result) +} + +func TestExtractText_DocxWithAnyFilename(t *testing.T) { + testdataPath := filepath.Join("testdata", "test_document.docx") + data, err := os.ReadFile(testdataPath) + require.NoError(t, err) + + result1, err := ExtractText(data, "random_name_without_extension") + assert.NoError(t, err) + assert.NotEmpty(t, result1) + + result2, err := ExtractText(data, "document.pdf") + assert.NoError(t, err) + assert.NotEmpty(t, result2) + + assert.Equal(t, result1, result2, "результат должен быть одинаковым независимо от имени файла") +} + +func TestExtractText_UnsupportedFormat_PDF(t *testing.T) { + pdfHeader := []byte("%PDF-1.4\n") + result, err := ExtractText(pdfHeader, "document.pdf") + + assert.Error(t, err) + assert.Empty(t, result) + assert.Contains(t, err.Error(), "UNSUPPORTED_FILE_FORMAT") +} + +func TestExtractText_UnsupportedFormat_Image(t *testing.T) { + pngHeader := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A} + result, err := ExtractText(pngHeader, "image.png") + + assert.Error(t, err) + assert.Empty(t, result) + assert.Contains(t, err.Error(), "UNSUPPORTED_FILE_FORMAT") +} + +func TestExtractText_InvalidDocx(t *testing.T) { + zipHeader := []byte{0x50, 0x4B, 0x03, 0x04} + fakeZip := append(zipHeader, []byte("not a valid zip content")...) + + result, err := ExtractText(fakeZip, "fake.docx") + + assert.Error(t, err) + assert.Empty(t, result) +} + +func TestIsDocx_ValidDocx(t *testing.T) { + testdataPath := filepath.Join("testdata", "test_document.docx") + data, err := os.ReadFile(testdataPath) + require.NoError(t, err) + + assert.True(t, isDocx(data)) +} + +func TestIsDocx_RegularZip(t *testing.T) { + zipHeader := []byte{0x50, 0x4B, 0x03, 0x04} + assert.False(t, isDocx(zipHeader)) +} + +func TestIsDocx_NotZip(t *testing.T) { + textData := []byte("plain text content") + assert.False(t, isDocx(textData)) +} diff --git a/pkg/fileparser/testdata/test_document.docx b/pkg/fileparser/testdata/test_document.docx new file mode 100644 index 0000000000000000000000000000000000000000..86bc44b5b2dfcb2844d1f272a2614ea8274db246 GIT binary patch literal 13165 zcmeHOg?1cCmMt+ev&GEJ%*@OTC1z%enVDG@qa|5j3oN#nEsMco##Q%pZ})im><{eZ zIh9qJ5%*?9)C+hwUMk6gfujQ;0nh*dfEch*^36^M1ORvk0RW%@ph0y-9qe7r?OhGk zyd2G4^cg&XcJGS7L8%G=pl|vAd;C9cf##G^`+g>5@!Qk~q{KFLll`J98qjEgGE$dt7?c(d@*^z%w4h`IYfjpwg>Cu( zGaQP|6sC3I5spARwytg*ISvpe(w2dwaaCBwoZ_4i46Qpr98GGx2AOv(D`DspChPax z?sc&Ab#g~)#TF<$M1ulUxa7tKc0YS55^S?2ml-X~RJl;2X7tEI--Frw6yJ2E1m~3y z7lN}mTx`J%CCxafV^<nxsCP0Va5% z(~#$o>2BraKI~!zM$@Sk>?J5@U1>SAl|`F{muJ4kMNs_%W69BPGs#olE;$lz-&6F@ zaMRQgqPk(`?+pfYAGZ!Dg_l1fv54T;vqh?>NOOuUQ881F^f zp%J^;DTlKrp`M&IC2K0qb5dWp&G2U}$Yh?Iw~^BIfvVyc?+GX9oWSviwGeDbXQfYw z?_Y1!|EzYH+|CkfLw!w$YsbV$o|9-&1B-jsHGSw^{Hgp)WEdSxAxtFWqpyV?s?N(^ z)Gm%7$0@OPv*D%#UXH%-<=e#cpZ#OrtV#LmtudJ2dJjGT8pOlF*^KdDoy5$+)D8GH zd;K|?j|gb66OD8Y z4D9lzY!qZ?uY<&jxKyT?-?yZg@!+$W3lzvQ#>R*2vJGlL0u9`vom2NbSQK!S@|eDS z&_MRMW{A`22k;VnqS(`?sace_D;- zy#pdD6adgg2mqkG8N=^Z^LKmc(q6J(kwEujoC*u#EmK6r+yEDu1S*Kmlyw7U z6{Ul$OeW=hq>AbR_k#kM)l*{K)VePUS0RmF?QZu9dJp%Ul*c=3Aqz9;ECTj}Wq`Zu zsKaAlHs5XuXgqN7ls2O+GZL*LQ_oYTYU_q8TgfO;$FQv$OoW}AHSNwbuVS-up_)$@ zaMxEVU%Q)e)H(+PF68P9oMBH_8Gq-YTGu8gyXxAN zm62u^so#-js?!sHdmv#saHa~zpB^RhYZHH@M1dL_gW(}b-5gt!!ZG2NvKMSJ<|8&K_c=H^o^LyLhgH=e?K~yGT823oB_w(a+f$_c9JK zr!iaU!brb2}0P&aKW4ciWqDddRp$e*ns;-M)wxKln$pvM{VrCU?P z(7K=wBfsN^NMNN`=qEDShQco5o>1zyb}Ac0ro4gOY#t0x<@P#mW&gwz)_^b2;1WS)&{f+XSU#>63iL z3|jdCh?S+X)^Gb#!nsUkY>y>U z`$uz+h^t=>(ACx-XxvtQoQL@`ik$ym5}zVL#=A60V+UMLNav~_ODoiC2eb>OdpC`YEG#;lwk&;6$p0p5nG61YSd$VCsqH6Ca}yZbD9qu+yO{UfnS>NVI~wO`u7NPdvsWFMy&t(5bW zi3VE)2|bz-UZ_^@_D&TGYm2=OT$4Fpkt_dc>Z&ApfWx8ZGB!%+r!co%8`rW7D+ir9(jfjr3FwX! zpo%f{>Td8t^iLD0l6UOI10(?O^bP>P_;UhvwK4~qGyZ;N{%z59tgRc3$BFLGcqIsb zd2+|GB^!Zu+>pCskJ^}>!e;~Yl4!6nBlF@c0~fiNO-~cid6*JfNf$`@7FNcV68;kQ z(*c_7>Vnj}q=BDQ*FMPttne6jzSDPae}-F9a){M(VlA?=DZb+!opr~KD)~a+NFahV zYU?WA$7hiNw@|SWWaYH)X}kmd{2n*Yw-W_3Ed9*xLINn|%A>I*IXMiNUjX5WTwf@- zf;v#Pn3JYE|pIxtxrVCx%*hs*R^Yr&mmG9D} z3Lzm=PN{x+5?@`P-o_)w=MnM?@H{;eHs|->QtG&KF;p`$qk-&92o%v{r4crU==(}` zB8rRji;dDB2)23N$T?Aq?LaQ&=o!9_@9csOIEjy*J33JBd-Z7AGG5yPQjgjVx;2mH z(jM3V2$1!cDbbZp1~6UR98faZm}_g^Z8{uh+|P6DzAKtUV0FUQGr9w}kHQuM_*B3L zThC83$FB0vxG5aAws+o@YuAU61|cLeN$0qo+Y42FCT+6A5z}xQo-?-(*I)7T3Zpu# zWOvZ+s~WJ1S7L63g?RC#lenBY`bgOT9RTIVp~xojerQmqU{ngDb~Wm2S@pX*fjoO_ zq~aOuosyZtAT2zVCw%7bW+b{NUn;r!Pea^lwJrnKP){W{5NoS-# z9<3<8`nyivUk=s1c;96bFkakYZ3JBXNZ)vo?`E^|^S}FXzIn+g@Ur#sJ*H=NaW|iD z`e!XJ&9fkv+%NUQqoAJgpYlH2he{a~<%WWiOO0qv@#Em^)3S;sLM-%^5jZ$Y5pjIK zKLCcDDSR$Z(h5v3UIf^)O7O<$-nwenrndF7!TgNAv?lZ*DD+!d?u$zInSsdxGf~A6 zTP*O34IWjFK%*mI9~#8g{cMk7vgno#&xK}3maF^u;GGHk2*VO<5j9$;kjdHTm;~|@ zaM9wiY{q!DtS9%12=GHD?_e z&N#W_t0m;xXoAJD{YF{SqL{PvAjdYQ30?egqZ4wWr^$|75>v$wP56{dL^iTR2 zY91lcm|Q$8peF~jU0z}jy0At^Ah5L~bWBVA8656--(={A7D-IPS|9OBucc3GlBWe# zB+5p~{nthKOhP@k26lr3BfAon{<*@InUT3z8t!XH^t+;W;1XK6TDkbt}4KV+uSlDA7SreuC@ zed=C@qKVlv6lgSxFJEr}dlMd)Qsgv_vgO<7swj+nrPiN|%A(6g*x_#@AiPy(a%S0H zAG0iGyaX!E6lgM&Wd<28FljPtJg4JBF_HY6)9zC9L1Upyd7UNB)27T50`-`V2MSKC z$VcBRB|~Rj?jYz5C))Goj37nCxRbC2nU8vhDUGSW@6Tic2}>btJ=8#R#dT;h=Fwqb z%<~BbBrS2Fn?yTNt(fuXR9hn5r5(C4&LUZ0fu|fFB@EMBBvt50&0>Wx5*K>7)6r#N zVj`6Klfpf--@@u?AgU-%`!Sk>Z$c3ZsQJt6V6F4JD>|^0b^MGn_mTf{?a2pB7!J#p zCcz3v(W9sy7&<<(*i{M1WBI}p>%9fM(;-hyT@^xW+QaY7tbQo42&AC*02eGQz)=*+ zSIUH&X0q-8{r0 zJ#uKp>bMEh9m=ig`k+kQ*o$s^6nf{XP_wDuVGJnOdzxojfbw?^GkBD+?v(8KYhA1u zD4mChRBqnF8qCpR;QD{mv3m4xS-l)BwJ&$ZiXka(KQ6bu-5e6k9XGr_I&WEcJ}`-*O{X0^dby(i)KtBz|31)Mpho>++@?vDmaY1yf8UWiZkUDA5qf7! z!TL`Ruus#+8*aIN2sg`fz17#aVLIHWfZaA!PAo~;5zxGXC>OI%jaOvb0 zwN{ghcOg1O@vD-UqY#S+l^nMZyxbB{Pdj54OX+NXDLVQ1c;d;oK5{C^=ghH;Qdv>4 zXK=@GCQ-rwUUuz%;@3Sn$m&qSR>f>mMB#EOlybXyxL~+`sw&~~iDPO2+UVys(Z-dd4)w>d#1 zQUxzrErpz3>eUhRg4~}sIh9IB!3N0xki--rJX=*1>umQVxdf=eA35>KUUb^Mx-mWr z{&^DiSu#33)Dj+-8ny@>AK8*YNCzoy%Ti2osb146(WaP;^B#13>627C%y0S zsc(|14hnR$WXV4AE~UdWe#-{ZlYPYr1uJC@o*`P=A7dn&<~vT})9Dxq@qpm}k&W!z ztcXVf*G@HMlvKHg2l%R*f)~rCR8mD8vlOSw6x_QduqN}lP?IaNrLRPce&&{H!)iM4 z8)DxeL$Ed!7DiIVl$brP8em0bRtI+Xs{T%OpZtmdW}&F9Uf)U!*ck_j$^N5$j#A%- zo{2fJC8_60E+*)j@a9qAR7wac4L#W>`A4h(QHhZeda5kIdc%02 zu^ScRY1tsgmyRAgZvp)%L7I*Pr^CxfIdw2h-DvFs6*ojsz-2-gGTA`oKu*51GaZPsCwe(c2pX-F)y4Hc(!+CBO&r)5~K_F-u zH(*IRyPTN+QB9P33DY^+PFhEQDekoAhJ18;Fa=M$xa^>cVnk=Gd3ag(Gb3IctqifLV*LvtReYJh~1&tjBpy4wuu?W6*Y|L76?Ov zF0L0fHkwax9n@21g=}z@lBmX-T+jt-ZUGUCO|$F>sT^H!Ww_$^aFe+2F68hey4&3x zqEOR^l%0^QU&mD;Syvy^HU*hm$zMKcjn&XnrEKl?UK)o!&qGX91iR1lHqVIg9D78E zIYo&(nL;|RE|{=L7cm_TDsr``s=UqRd;XGNAymL`Ggx!o%U;2gcWgMK2b|C;s-1ir zB8Hm2vtTn8?R+WsO#Pg^v%T?>)ENM#`TCEtDDz|zcH~>i-P^VS%9||urfn%XJ2<*9 znmRa}|FMtvUoG^_aq&&+Q`llc7CNOpM@l@c{8_Iin*s?!tDyEyxj7=cf$~ejVj4f& z&y6iIV5e}zO3dE)jOXzWla<*KNZ+Y>I_P}nq`*WC>n4MQp*i`7_hcYp%0;b^m}XEx zdMT8jU=N(F`B#08dW*vo#FAk;T;VRF_MK%ye8VI{waMh@H0C)$O%E4lVeT2`g;)o7 zn?cZJ3JP+W+=>&AQ)S|0Za0q%3dq86ahb5G@}0cRP<+vPfcvzsiLryZAr8}`P9<%^ z@kT4HO*hSwTTeLx^bSN1)n&`BACrW>ij3g1&nsbGVGmOIc69}P-t?3+q2q^c?&-q9 zt4JQD$NqRwC|am{&wH$Mq#p4zna%_@_-fhMcUV|}JHnM7lwoUB19x`rgPzR+upWcL z<84QbEFQ$S?itj*n~QO9<@EcD>(x zOw>R9ON+PNXf-_!c2N)DD+kyX8QlO6^(+NYu*=S!06d zm6OorHxynnU8bC#s)IQAv~{0xk3&d^R26yCAI!!>ITD&AWn~XrS)KFbxxPv-1s-=0 z%q|g~gAeiccRqR`tu?YO3y0q0rbX?QxPQvZ@x%vyFmfYD^t zfmf8Y-ym5NZ9>v2E>mf?5v!;;NZ0`k0`J@tq}dkW0K;hFi!r__Gn2_!*Y>apzNP=} z@Q>ijRucw*QhO1hxU^!=pg=me25SXlq=}yL5{`%3I_Z~mLmD963WrpG9pUEtXwb#F z#ZRBYYPrDI+JrO)>X6-J$I&f5Xp^>SgbR%}gCrW+O=jg7A>m~yZ^+76x*lqxK8AEz zV1~XA4LNdSOqUzk^;9WL2KW`E#w}3#P1ang-x4QaK)TnLa(iVdX9)l~N;FZ)wtUN* z9u-59Oq8Fmjg`tQ!@R?w6g=g;5s`8(aeOUTc^!8?H>rUZITS_r;C;D5kW}H3q-41o z;AQRJlzJ^ok6Y{|*n#eL22XtyzcIweu^@e*sa{^oK^DXc$W9P^JP(IC_xJTZ7!u4* zFb1nOd^3pu)bvkbsTCC7hCKhbH5mGzCgWjlqWZUmR?du}?c1(3;Ow@qM|9+4*;jOG z4P*ErgF&MesEC8PaPp3%=<1W1)8$H+JCoa_*V`=y;>(?hrEN4&oU0CO1l7dA1ug7P zBVQg@Y?#ns#CPD#r$rfVgiJcxyE8ep!HzT|5*1bYqsqq)^wgN=GgC@v9izgvLiWZ{ zLsS#XjY)H1iB`M|rsrs~S{f)$cDUrrfAoghqSMJ1Z-QLU2gU1m!5e9dDW+B)RIo2t zA%qtpRs=;HIo%Aoe7!?s?fe*+GYtYW%C&8Rh~tL-KK`5i_Aw*V0aOfQU}d{<{prL}H_Uv-w z=eoSJ0dj)XR=t1~H_=76BOr`P9}(!0RnC1dM)+eOf^fpm#zxHS^gYSEVZl!p=LrAQ z{^WMf4FPZfz>zZmfb?hkySRGUng71V?aI|v-QqzV+Gw-+6hf=`5lId@ok^5n7oBN9 zgnn#O&S+1&Ayq6IXzsV!nD0-%B#pl8yz0!zn4tSgc2pOj`&0H}<+M&3pYM_mi-jLA zM&KtBXkIuA>cNxU*_~s{trzk(ge)oMXi7f9x~IV1^O51}#mcC)LDv^PB=mRVipgho z^;A(i2aXx<;qvOAlJzdsy1A3J`(dxyvXRiz_)|x_l_+F2A}S-q&pG%OH;9mICaL@*_H z$@)&Hbqw-5gdwR1fnh%yWkWkNok?R@tKGZGyKTq;)*X_D> z!-Uj*g-eA0bdE#2MY5pS^jPDsBgp#t!AxhnZB^%hRXBaZAj61VluQI=w?eyKc-T*B)cp0s;(ea$OnF*OPyp|!1 zW%AcQLMeafh@mLfB9+0RoEyUB3GDJMcfIVTN)uee71F6*-<_G+SqSGyZWxO8%tlIe zXS39fAQ2!RDW;puw8Okk!g5rlT16Q;mNhvsdB-!YNfINvirE8!tWCacKX+9t!Vv|l z3X9`y_V0>uaRbyuDT(H$_Tp`Zjb8E*BH2_TH+%^>z$!j+A#xURBQ_OsBQm~ystdRg zDGRvcBI9d$_-~>Vqn2{y5^K8aH+-M)mtQW4jqKU_;rk#M8Jh- zi#6|@Z94@m8?lPE?;U+Ib>FI9BR5WTIm-E2Z3LOLF@K@@bJr9F3V1S)u`$J?hdfK; zlOyfLi)krwpE%A?PQ-PHMHG(4>vFHrLpmhn%qJG6jF+&WQk*&DIkuw2Dr+$?jhTqS z+N^xFt>1r?p~ULS9|_tc6-CHLwP_L*pA4Nam{uy%T%8Pwjj5Jl0-ilMubF4Q=b78( zHSCZ~NtR|e+;Et#bRLyprUt2`!Rn=lzF9is-S*h*nTt*zX?4sbIjflU z$TL2A*bN*yRNrbxHv6zA6{I)7=UaU{yIx7MtfKQdllzStik?x*gWCzOpw*LI1DNT zF&HjNfWc`JK)q`-^nLKiLd>fs;qoJ0UUn%O5QTK7hIVJckd_c5YZ*hYe7E!-cU|y) zMXiypsBq1)s}r>1yVz$K*|fQ$kX~IH#-55N(Ry5#u7C!){Vfr-GVgam+V2rAogW+s zZxr1G&4irI_Tw9whKR;h7Q|*Y9~jYE9iP*oCFEOtrY_TsOxqlvIo;tTTN=`tM_o4g z!Vbramsc_3y?q|B0@xJ?S+Y}^*8m>^%}CoplMo7HM#;vh+ZNBDITz0=+R`1rViWce za}PX9QjoAP!^g zd07~#U5@ou>}Ccjk2eiCfaH(Ql4RIpnG#p-=7g23OqW0?rm>hV;h|jkjP1pI2Uv@L zoZOb%z$KYEB(VoA5s)B2V`!ztPaXbrmr`S!CBDQMOj<-2S4zgy+PbuvG5phZ!96UK zm)qN8Q^11QaG2{yrfK$SLzO|ETAO*-Y0rT~`9NNqNe@-2hG#$e2?hr|cio7(y{ z1diByPd7nX)kT7MvCJACScfufB6lPXR&UxnD~~e^@&*jARc6#y?sb9=ZE^?kJlB{| zp#KTBwQncJ((Zc)Iv<-HXiDIvrT!K-#fJ0>oemTy#|lT{e4@PSSqTsms7=m=B!NyVGCsLy}W-sk6~u+F-f*HjGba=@I$h(BHGoxd2b$Z&_$c3SwbVtcpdz zmn$r~7Y8bs2rbpHPu~#d%zsZq4&tB@`zdIJ7}zbp7+7ini7@p4E!)2!zg?QEA)lE4Z|Ngi z@3r%bT=gvk`-!ey4H`C{XVup#^KWWbIS%W2ZRTh>s~@|eu~Y;gVmS#3v%HJ(#NbAD_Zz8wtSkWTKIqF)pf^GuZc1&r{ zxTZ$#{jQvboN(*v7$HJi53EoWRzq8@VOn%bO|4=+6m`Jp&$*y=e%+@ixN+e*y z3R|NRtSlU_S9!#nlkb&5fH6=3i}^H*yhG0}b}x;9Myg>W)hqKeKW7b`u@Yp^&?e#{ z4Y%m2B>W>w&1^zoDlTfh%IzvKFY}9vXiD1~j*?w()wnbq{EyVG5=iWTO5(4&Z@^?+ zwGpdIVh~z(GNZQ~f8$#&;gD2rEs>zM3){jp)_7>IfzJ-hTz2Q|(&{tr6NGO^WZI;Df^ zvb8E(Rpf26?KYLtHhL|yWGw;C?r2>lf25RZRKAhw{+{v%vq}1$X3gdO@#j0)KPl|r z@~9;LMe%>~|H;Dr=fi)0#J5~n6mUZjc4;jQaMukikMRrfzJ~zrP1CG%y}?O7SS!wJ z%$C%F^xFJS-|Q||d=D?fa9N)b_UC`fC|@JgvrqY+NhD_6L(r~87QE2cKfJG&GdyB# zQ8%T4Y(-^IV>NzmQU9QEBg`7-P{KZLZ?SwhbG-~X4C?Xci7Xi0&<3o`I?LQU}~-`oY><=XG@`Woazp@0>(jZ2}-hVuHA?KRcwgWx~< zN)#M&V;*n5(&(Fujr@0c$k@^Gzv|Fill^hzsEjJSMap%No+Bxr)T)YA2e(R8VEpJU zR#D~By`gAuv>WB9c=q8y2z_V2-OT!tH){#o9rfJyAE-1(Y#vW`gs3`UHb0>Hx-`39q+T)972IEb$a-Cs3>AK!$4g8Qbj!A=(z6Ld zpi5zZQ9Z=GhnNNntgT?az^U&E)WkyHl+8Mn)#4<9&fE|7<^BlH(eHTFJ*`64NgpuF z(!O7n!k-Usxh7D54y_bH@b$J}CK0Q55Sg%<91PCLPk17@BIIm5y&I%Y0XD=oYLeEN z1oV;E=%)@N-AF?rNq1ZOZRv_@`kl_be4I$n;p}l&g4w2x0MQVEn~@|P&4r*AHgtQr zA*x)t;>T8rYyQi&X@axTfNbB^49u%shMRHalg;Tf#kXevmtzkCO8@4>|MPnZfBrdt ze*eQ;3QDs7PVnzh)PDkh`%ZpS3;q^U{VVX_!%P1TYPd6w}^c>K;PRKo5u9p)&Br1rHR-8 literal 0 HcmV?d00001