diff --git a/internal/grpc/request_handler.go b/internal/grpc/request_handler.go index 287e48d..a3ea20a 100644 --- a/internal/grpc/request_handler.go +++ b/internal/grpc/request_handler.go @@ -11,12 +11,7 @@ import ( ) func (h *RequestHandler) CreateTZ(ctx context.Context, req *pb.CreateTZRequest) (*pb.CreateTZResponse, error) { - requestTxt := req.RequestTxt - if len(req.FileData) > 0 { - requestTxt += "\n[File: " + req.FileName + "]" - } - - requestID, tzText, err := h.requestService.CreateTZ(ctx, int(req.UserId), requestTxt) + requestID, tzText, err := h.requestService.CreateTZ(ctx, int(req.UserId), req.RequestTxt, req.FileData, req.FileName) if err != nil { return nil, errors.ToGRPCError(err, h.logger, "RequestService.CreateTZ") } diff --git a/internal/mocks/request_service_mock.go b/internal/mocks/request_service_mock.go index c5a4cdb..4414708 100644 --- a/internal/mocks/request_service_mock.go +++ b/internal/mocks/request_service_mock.go @@ -27,9 +27,9 @@ type RequestServiceMock struct { beforeApproveTZCounter uint64 ApproveTZMock mRequestServiceMockApproveTZ - funcCreateTZ func(ctx context.Context, userID int, requestTxt string) (u1 uuid.UUID, s1 string, err error) + funcCreateTZ func(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (u1 uuid.UUID, s1 string, err error) funcCreateTZOrigin string - inspectFuncCreateTZ func(ctx context.Context, userID int, requestTxt string) + inspectFuncCreateTZ func(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) afterCreateTZCounter uint64 beforeCreateTZCounter uint64 CreateTZMock mRequestServiceMockCreateTZ @@ -508,6 +508,8 @@ type RequestServiceMockCreateTZParams struct { ctx context.Context userID int requestTxt string + fileData []byte + fileName string } // RequestServiceMockCreateTZParamPtrs contains pointers to parameters of the RequestService.CreateTZ @@ -515,6 +517,8 @@ type RequestServiceMockCreateTZParamPtrs struct { ctx *context.Context userID *int requestTxt *string + fileData *[]byte + fileName *string } // RequestServiceMockCreateTZResults contains results of the RequestService.CreateTZ @@ -530,6 +534,8 @@ type RequestServiceMockCreateTZExpectationOrigins struct { originCtx string originUserID string originRequestTxt string + originFileData string + originFileName string } // Marks this method to be optional. The default behavior of any method with Return() is '1 or more', meaning @@ -543,7 +549,7 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) Optional() *mRequestServiceMockCr } // Expect sets up expected params for RequestService.CreateTZ -func (mmCreateTZ *mRequestServiceMockCreateTZ) Expect(ctx context.Context, userID int, requestTxt string) *mRequestServiceMockCreateTZ { +func (mmCreateTZ *mRequestServiceMockCreateTZ) Expect(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) *mRequestServiceMockCreateTZ { if mmCreateTZ.mock.funcCreateTZ != nil { mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Set") } @@ -556,7 +562,7 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) Expect(ctx context.Context, userI mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by ExpectParams functions") } - mmCreateTZ.defaultExpectation.params = &RequestServiceMockCreateTZParams{ctx, userID, requestTxt} + mmCreateTZ.defaultExpectation.params = &RequestServiceMockCreateTZParams{ctx, userID, requestTxt, fileData, fileName} mmCreateTZ.defaultExpectation.expectationOrigins.origin = minimock.CallerInfo(1) for _, e := range mmCreateTZ.expectations { if minimock.Equal(e.params, mmCreateTZ.defaultExpectation.params) { @@ -636,8 +642,54 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) ExpectRequestTxtParam3(requestTxt return mmCreateTZ } +// ExpectFileDataParam4 sets up expected param fileData for RequestService.CreateTZ +func (mmCreateTZ *mRequestServiceMockCreateTZ) ExpectFileDataParam4(fileData []byte) *mRequestServiceMockCreateTZ { + if mmCreateTZ.mock.funcCreateTZ != nil { + mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Set") + } + + if mmCreateTZ.defaultExpectation == nil { + mmCreateTZ.defaultExpectation = &RequestServiceMockCreateTZExpectation{} + } + + if mmCreateTZ.defaultExpectation.params != nil { + mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Expect") + } + + if mmCreateTZ.defaultExpectation.paramPtrs == nil { + mmCreateTZ.defaultExpectation.paramPtrs = &RequestServiceMockCreateTZParamPtrs{} + } + mmCreateTZ.defaultExpectation.paramPtrs.fileData = &fileData + mmCreateTZ.defaultExpectation.expectationOrigins.originFileData = minimock.CallerInfo(1) + + return mmCreateTZ +} + +// ExpectFileNameParam5 sets up expected param fileName for RequestService.CreateTZ +func (mmCreateTZ *mRequestServiceMockCreateTZ) ExpectFileNameParam5(fileName string) *mRequestServiceMockCreateTZ { + if mmCreateTZ.mock.funcCreateTZ != nil { + mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Set") + } + + if mmCreateTZ.defaultExpectation == nil { + mmCreateTZ.defaultExpectation = &RequestServiceMockCreateTZExpectation{} + } + + if mmCreateTZ.defaultExpectation.params != nil { + mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Expect") + } + + if mmCreateTZ.defaultExpectation.paramPtrs == nil { + mmCreateTZ.defaultExpectation.paramPtrs = &RequestServiceMockCreateTZParamPtrs{} + } + mmCreateTZ.defaultExpectation.paramPtrs.fileName = &fileName + mmCreateTZ.defaultExpectation.expectationOrigins.originFileName = minimock.CallerInfo(1) + + return mmCreateTZ +} + // Inspect accepts an inspector function that has same arguments as the RequestService.CreateTZ -func (mmCreateTZ *mRequestServiceMockCreateTZ) Inspect(f func(ctx context.Context, userID int, requestTxt string)) *mRequestServiceMockCreateTZ { +func (mmCreateTZ *mRequestServiceMockCreateTZ) Inspect(f func(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string)) *mRequestServiceMockCreateTZ { if mmCreateTZ.mock.inspectFuncCreateTZ != nil { mmCreateTZ.mock.t.Fatalf("Inspect function is already set for RequestServiceMock.CreateTZ") } @@ -662,7 +714,7 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) Return(u1 uuid.UUID, s1 string, e } // Set uses given function f to mock the RequestService.CreateTZ method -func (mmCreateTZ *mRequestServiceMockCreateTZ) Set(f func(ctx context.Context, userID int, requestTxt string) (u1 uuid.UUID, s1 string, err error)) *RequestServiceMock { +func (mmCreateTZ *mRequestServiceMockCreateTZ) Set(f func(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (u1 uuid.UUID, s1 string, err error)) *RequestServiceMock { if mmCreateTZ.defaultExpectation != nil { mmCreateTZ.mock.t.Fatalf("Default expectation is already set for the RequestService.CreateTZ method") } @@ -678,14 +730,14 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) Set(f func(ctx context.Context, u // When sets expectation for the RequestService.CreateTZ which will trigger the result defined by the following // Then helper -func (mmCreateTZ *mRequestServiceMockCreateTZ) When(ctx context.Context, userID int, requestTxt string) *RequestServiceMockCreateTZExpectation { +func (mmCreateTZ *mRequestServiceMockCreateTZ) When(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) *RequestServiceMockCreateTZExpectation { if mmCreateTZ.mock.funcCreateTZ != nil { mmCreateTZ.mock.t.Fatalf("RequestServiceMock.CreateTZ mock is already set by Set") } expectation := &RequestServiceMockCreateTZExpectation{ mock: mmCreateTZ.mock, - params: &RequestServiceMockCreateTZParams{ctx, userID, requestTxt}, + params: &RequestServiceMockCreateTZParams{ctx, userID, requestTxt, fileData, fileName}, expectationOrigins: RequestServiceMockCreateTZExpectationOrigins{origin: minimock.CallerInfo(1)}, } mmCreateTZ.expectations = append(mmCreateTZ.expectations, expectation) @@ -720,17 +772,17 @@ func (mmCreateTZ *mRequestServiceMockCreateTZ) invocationsDone() bool { } // CreateTZ implements mm_service.RequestService -func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, requestTxt string) (u1 uuid.UUID, s1 string, err error) { +func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (u1 uuid.UUID, s1 string, err error) { mm_atomic.AddUint64(&mmCreateTZ.beforeCreateTZCounter, 1) defer mm_atomic.AddUint64(&mmCreateTZ.afterCreateTZCounter, 1) mmCreateTZ.t.Helper() if mmCreateTZ.inspectFuncCreateTZ != nil { - mmCreateTZ.inspectFuncCreateTZ(ctx, userID, requestTxt) + mmCreateTZ.inspectFuncCreateTZ(ctx, userID, requestTxt, fileData, fileName) } - mm_params := RequestServiceMockCreateTZParams{ctx, userID, requestTxt} + mm_params := RequestServiceMockCreateTZParams{ctx, userID, requestTxt, fileData, fileName} // Record call args mmCreateTZ.CreateTZMock.mutex.Lock() @@ -749,7 +801,7 @@ func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, mm_want := mmCreateTZ.CreateTZMock.defaultExpectation.params mm_want_ptrs := mmCreateTZ.CreateTZMock.defaultExpectation.paramPtrs - mm_got := RequestServiceMockCreateTZParams{ctx, userID, requestTxt} + mm_got := RequestServiceMockCreateTZParams{ctx, userID, requestTxt, fileData, fileName} if mm_want_ptrs != nil { @@ -768,6 +820,16 @@ func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, mmCreateTZ.CreateTZMock.defaultExpectation.expectationOrigins.originRequestTxt, *mm_want_ptrs.requestTxt, mm_got.requestTxt, minimock.Diff(*mm_want_ptrs.requestTxt, mm_got.requestTxt)) } + if mm_want_ptrs.fileData != nil && !minimock.Equal(*mm_want_ptrs.fileData, mm_got.fileData) { + mmCreateTZ.t.Errorf("RequestServiceMock.CreateTZ got unexpected parameter fileData, expected at\n%s:\nwant: %#v\n got: %#v%s\n", + mmCreateTZ.CreateTZMock.defaultExpectation.expectationOrigins.originFileData, *mm_want_ptrs.fileData, mm_got.fileData, minimock.Diff(*mm_want_ptrs.fileData, mm_got.fileData)) + } + + if mm_want_ptrs.fileName != nil && !minimock.Equal(*mm_want_ptrs.fileName, mm_got.fileName) { + mmCreateTZ.t.Errorf("RequestServiceMock.CreateTZ got unexpected parameter fileName, expected at\n%s:\nwant: %#v\n got: %#v%s\n", + mmCreateTZ.CreateTZMock.defaultExpectation.expectationOrigins.originFileName, *mm_want_ptrs.fileName, mm_got.fileName, minimock.Diff(*mm_want_ptrs.fileName, mm_got.fileName)) + } + } else if mm_want != nil && !minimock.Equal(*mm_want, mm_got) { mmCreateTZ.t.Errorf("RequestServiceMock.CreateTZ got unexpected parameters, expected at\n%s:\nwant: %#v\n got: %#v%s\n", mmCreateTZ.CreateTZMock.defaultExpectation.expectationOrigins.origin, *mm_want, mm_got, minimock.Diff(*mm_want, mm_got)) @@ -780,9 +842,9 @@ func (mmCreateTZ *RequestServiceMock) CreateTZ(ctx context.Context, userID int, return (*mm_results).u1, (*mm_results).s1, (*mm_results).err } if mmCreateTZ.funcCreateTZ != nil { - return mmCreateTZ.funcCreateTZ(ctx, userID, requestTxt) + return mmCreateTZ.funcCreateTZ(ctx, userID, requestTxt, fileData, fileName) } - mmCreateTZ.t.Fatalf("Unexpected call to RequestServiceMock.CreateTZ. %v %v %v", ctx, userID, requestTxt) + mmCreateTZ.t.Fatalf("Unexpected call to RequestServiceMock.CreateTZ. %v %v %v %v %v", ctx, userID, requestTxt, fileData, fileName) return } diff --git a/internal/service/interfaces.go b/internal/service/interfaces.go index c2ca84e..1634290 100644 --- a/internal/service/interfaces.go +++ b/internal/service/interfaces.go @@ -29,7 +29,7 @@ type InviteService interface { } type RequestService interface { - CreateTZ(ctx context.Context, userID int, requestTxt string) (uuid.UUID, string, error) + CreateTZ(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (uuid.UUID, string, error) ApproveTZ(ctx context.Context, requestID uuid.UUID, tzText string, userID int) ([]*model.Supplier, error) GetMailingList(ctx context.Context, userID int) ([]*model.Request, error) GetMailingListByID(ctx context.Context, requestID uuid.UUID, userID int) (*model.RequestDetail, error) diff --git a/internal/service/request.go b/internal/service/request.go index 6c2af6c..769f19d 100644 --- a/internal/service/request.go +++ b/internal/service/request.go @@ -2,12 +2,14 @@ package service import ( "context" + "fmt" "math" "git.techease.ru/Smart-search/smart-search-back/internal/ai" "git.techease.ru/Smart-search/smart-search-back/internal/model" "git.techease.ru/Smart-search/smart-search-back/internal/repository" "git.techease.ru/Smart-search/smart-search-back/pkg/errors" + "git.techease.ru/Smart-search/smart-search-back/pkg/fileparser" "github.com/google/uuid" "github.com/jackc/pgx/v5" ) @@ -42,21 +44,37 @@ func NewRequestService( } } -func (s *requestService) CreateTZ(ctx context.Context, userID int, requestTxt string) (uuid.UUID, string, error) { +func (s *requestService) CreateTZ(ctx context.Context, userID int, requestTxt string, fileData []byte, fileName string) (uuid.UUID, string, error) { + combinedText := requestTxt + + if len(fileData) > 0 && fileName != "" { + fileContent, err := fileparser.ExtractText(fileData, fileName) + if err != nil { + return uuid.Nil, "", err + } + if fileContent != "" { + if combinedText != "" { + combinedText = fmt.Sprintf("%s\n\nСодержимое файла (%s):\n%s", combinedText, fileName, fileContent) + } else { + combinedText = fmt.Sprintf("Содержимое файла (%s):\n%s", fileName, fileContent) + } + } + } + req := &model.Request{ UserID: userID, - RequestTxt: requestTxt, + RequestTxt: combinedText, } if err := s.requestRepo.Create(ctx, req); err != nil { return uuid.Nil, "", err } - if requestTxt == "" { + if combinedText == "" { return req.ID, "", nil } - tzText, err := s.openAI.GenerateTZ(requestTxt) + tzText, err := s.openAI.GenerateTZ(combinedText) if err != nil { if err := s.requestRepo.UpdateWithTZ(ctx, req.ID, "", false); err != nil { return req.ID, "", err diff --git a/pkg/errors/codes.go b/pkg/errors/codes.go index 648c609..e6017e3 100644 --- a/pkg/errors/codes.go +++ b/pkg/errors/codes.go @@ -12,6 +12,8 @@ const ( UserNotFound = "USER_NOT_FOUND" RequestNotFound = "REQUEST_NOT_FOUND" PermissionDenied = "PERMISSION_DENIED" + UnsupportedFileFormat = "UNSUPPORTED_FILE_FORMAT" + FileProcessingError = "FILE_PROCESSING_ERROR" DatabaseError = "DATABASE_ERROR" EncryptionError = "ENCRYPTION_ERROR" diff --git a/pkg/fileparser/parser.go b/pkg/fileparser/parser.go new file mode 100644 index 0000000..3d11386 --- /dev/null +++ b/pkg/fileparser/parser.go @@ -0,0 +1,113 @@ +package fileparser + +import ( + "archive/zip" + "bytes" + "encoding/xml" + "io" + "net/http" + "strings" + + "git.techease.ru/Smart-search/smart-search-back/pkg/errors" +) + +func ExtractText(data []byte, _ string) (string, error) { + if len(data) == 0 { + return "", nil + } + + mimeType := http.DetectContentType(data) + + switch { + case strings.HasPrefix(mimeType, "text/"): + return string(data), nil + case mimeType == "application/zip" || mimeType == "application/octet-stream": + if isDocx(data) { + return extractDocx(data) + } + return "", errors.NewBusinessError(errors.UnsupportedFileFormat, "поддерживаются только текстовые файлы (.txt) и документы Word (.docx)") + default: + return "", errors.NewBusinessError(errors.UnsupportedFileFormat, "неподдерживаемый формат файла: "+mimeType+", поддерживаются .txt и .docx") + } +} + +func isDocx(data []byte) bool { + reader, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return false + } + + for _, file := range reader.File { + if file.Name == "word/document.xml" { + return true + } + } + return false +} + +func extractDocx(data []byte) (string, error) { + reader, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return "", errors.NewInternalError(errors.FileProcessingError, "не удалось прочитать docx файл", err) + } + + var content string + for _, file := range reader.File { + if file.Name == "word/document.xml" { + rc, err := file.Open() + if err != nil { + return "", errors.NewInternalError(errors.FileProcessingError, "не удалось открыть содержимое документа", err) + } + defer func() { _ = rc.Close() }() + + xmlData, err := io.ReadAll(rc) + if err != nil { + return "", errors.NewInternalError(errors.FileProcessingError, "не удалось прочитать содержимое документа", err) + } + + content = extractTextFromXML(xmlData) + break + } + } + + return content, nil +} + +type docxDocument struct { + XMLName xml.Name `xml:"document"` + Body docxBody `xml:"body"` +} + +type docxBody struct { + Paragraphs []docxParagraph `xml:"p"` +} + +type docxParagraph struct { + Runs []docxRun `xml:"r"` +} + +type docxRun struct { + Text string `xml:"t"` +} + +func extractTextFromXML(data []byte) string { + var doc docxDocument + if err := xml.Unmarshal(data, &doc); err != nil { + return "" + } + + var result []string + for _, p := range doc.Body.Paragraphs { + var line []string + for _, r := range p.Runs { + if r.Text != "" { + line = append(line, r.Text) + } + } + if len(line) > 0 { + result = append(result, strings.Join(line, "")) + } + } + + return strings.Join(result, "\n") +} diff --git a/pkg/fileparser/parser_test.go b/pkg/fileparser/parser_test.go new file mode 100644 index 0000000..07650ca --- /dev/null +++ b/pkg/fileparser/parser_test.go @@ -0,0 +1,110 @@ +package fileparser + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestExtractText_EmptyData(t *testing.T) { + result, err := ExtractText(nil, "test.txt") + assert.NoError(t, err) + assert.Empty(t, result) + + result, err = ExtractText([]byte{}, "test.txt") + assert.NoError(t, err) + assert.Empty(t, result) +} + +func TestExtractText_PlainText(t *testing.T) { + content := "Тестовый текст для проверки" + result, err := ExtractText([]byte(content), "document.txt") + + assert.NoError(t, err) + assert.Equal(t, content, result) +} + +func TestExtractText_PlainTextWithNewlines(t *testing.T) { + content := "Первая строка\nВторая строка\nТретья строка" + result, err := ExtractText([]byte(content), "document.txt") + + assert.NoError(t, err) + assert.Equal(t, content, result) +} + +func TestExtractText_RealDocxFile(t *testing.T) { + testdataPath := filepath.Join("testdata", "test_document.docx") + data, err := os.ReadFile(testdataPath) + require.NoError(t, err, "не удалось прочитать тестовый файл") + + result, err := ExtractText(data, "тестовый.docx") + + assert.NoError(t, err) + assert.NotEmpty(t, result, "текст из docx не должен быть пустым") + t.Logf("Извлеченный текст из docx:\n%s", result) +} + +func TestExtractText_DocxWithAnyFilename(t *testing.T) { + testdataPath := filepath.Join("testdata", "test_document.docx") + data, err := os.ReadFile(testdataPath) + require.NoError(t, err) + + result1, err := ExtractText(data, "random_name_without_extension") + assert.NoError(t, err) + assert.NotEmpty(t, result1) + + result2, err := ExtractText(data, "document.pdf") + assert.NoError(t, err) + assert.NotEmpty(t, result2) + + assert.Equal(t, result1, result2, "результат должен быть одинаковым независимо от имени файла") +} + +func TestExtractText_UnsupportedFormat_PDF(t *testing.T) { + pdfHeader := []byte("%PDF-1.4\n") + result, err := ExtractText(pdfHeader, "document.pdf") + + assert.Error(t, err) + assert.Empty(t, result) + assert.Contains(t, err.Error(), "UNSUPPORTED_FILE_FORMAT") +} + +func TestExtractText_UnsupportedFormat_Image(t *testing.T) { + pngHeader := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A} + result, err := ExtractText(pngHeader, "image.png") + + assert.Error(t, err) + assert.Empty(t, result) + assert.Contains(t, err.Error(), "UNSUPPORTED_FILE_FORMAT") +} + +func TestExtractText_InvalidDocx(t *testing.T) { + zipHeader := []byte{0x50, 0x4B, 0x03, 0x04} + fakeZip := append(zipHeader, []byte("not a valid zip content")...) + + result, err := ExtractText(fakeZip, "fake.docx") + + assert.Error(t, err) + assert.Empty(t, result) +} + +func TestIsDocx_ValidDocx(t *testing.T) { + testdataPath := filepath.Join("testdata", "test_document.docx") + data, err := os.ReadFile(testdataPath) + require.NoError(t, err) + + assert.True(t, isDocx(data)) +} + +func TestIsDocx_RegularZip(t *testing.T) { + zipHeader := []byte{0x50, 0x4B, 0x03, 0x04} + assert.False(t, isDocx(zipHeader)) +} + +func TestIsDocx_NotZip(t *testing.T) { + textData := []byte("plain text content") + assert.False(t, isDocx(textData)) +} diff --git a/pkg/fileparser/testdata/test_document.docx b/pkg/fileparser/testdata/test_document.docx new file mode 100644 index 0000000..86bc44b Binary files /dev/null and b/pkg/fileparser/testdata/test_document.docx differ