Murillo преди 3 години
родител
ревизия
552f250fb3
променени са 39 файла, в които са добавени 8054 реда и са изтрити 108 реда
  1. +9
    -0
      .editorconfig
  2. +4
    -0
      .eslintignore
  3. +67
    -0
      .eslintrc.json
  4. +3
    -107
      .gitignore
  5. +11
    -0
      Dockerfile.development
  6. +40
    -1
      README.md
  7. +19
    -0
      babel.config.js
  8. +17
    -0
      docker-compose.yaml
  9. +193
    -0
      jest.config.js
  10. +2
    -0
      jest.setup.ts
  11. +13
    -0
      ormconfig.json
  12. +72
    -0
      package.json
  13. +6
    -0
      prettier.config.js
  14. Двоични данни
      resources/arquivos-exemplos.png
  15. Двоични данни
      resources/gitea-novo-repo.png
  16. Двоични данни
      resources/gitea-template.png
  17. Двоични данни
      resources/renomear-projeto.png
  18. Двоични данни
      src/__tests__/resources/empty-pdf.pdf
  19. +224
    -0
      src/__tests__/resources/recibo-pagamento-data.json
  20. Двоични данни
      src/__tests__/resources/recibo-pagamento.pdf
  21. +107
    -0
      src/__tests__/services/mapaFolha/ExtractDataPDFMapaFolhaService.spec.ts
  22. +22
    -0
      src/config/upload.ts
  23. +5
    -0
      src/containers/index.ts
  24. +5
    -0
      src/containers/providers/pdfToText/IPdfToTextProvider.ts
  25. +3
    -0
      src/containers/providers/pdfToText/dto/IOptionsPdfParseDTO.ts
  26. +32
    -0
      src/containers/providers/pdfToText/implementations/LinuxPdfToText.ts
  27. +13
    -0
      src/containers/providers/pdfToText/implementations/PdfParseProvider.ts
  28. +10
    -0
      src/erros/AppError.ts
  29. +15
    -0
      src/http/controllers/ExempleController.ts
  30. +23
    -0
      src/http/middlewares/error.ts
  31. +12
    -0
      src/http/routes/exemple.routes.ts
  32. +8
    -0
      src/http/routes/index.ts
  33. +22
    -0
      src/http/server.ts
  34. +58
    -0
      src/services/exemple/ExempleService.ts
  35. +16
    -0
      src/utils/date/dates.ts
  36. +33
    -0
      src/utils/format/index.ts
  37. +39
    -0
      src/utils/regex/regexMapaFolha.ts
  38. +16
    -0
      tsconfig.json
  39. +6935
    -0
      yarn.lock

+ 9
- 0
.editorconfig Целия файл

@@ -0,0 +1,9 @@
root = true

[*]
indent_style = space
indent_size = 2
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

+ 4
- 0
.eslintignore Целия файл

@@ -0,0 +1,4 @@
/*.js
node_modules
dist
@types

+ 67
- 0
.eslintrc.json Целия файл

@@ -0,0 +1,67 @@
{
"env": {
"es2020": true,
"node": true,
"jest": true
},
"extends": [
"airbnb-base",
"plugin:@typescript-eslint/recommended",
"prettier/@typescript-eslint",
"plugin:prettier/recommended"
],
"parser": "@typescript-eslint/parser",
"parserOptions": {
"ecmaVersion": 2020,
"sourceType": "module"
},
"plugins": [
"@typescript-eslint",
"prettier"
],
"rules": {
"prettier/prettier": "error",
"no-param-reassign": "off",
"no-plusplus": "off",
"no-await-in-loop": "off",
"no-useless-constructor": "off",
"no-underscore-dangle": "off",
"camelcase": "off",
"no-console": "off",
"class-methods-use-this": "off",

"@typescript-eslint/no-unused-vars": [
"error",
{
"argsIgnorePattern": "^_"
}
],
"@typescript-eslint/naming-convention": [
"error",
{
"selector": "interface",
"format": [
"PascalCase"
],
"custom": {
"regex": "^I[A-Z]",
"match": true
}
}
],
"import/extensions": [
"error",
"ignorePackages",
{
"ts": "never"
}
],
"no-shadow": "off",
"@typescript-eslint/no-shadow": ["error"]
},
"settings": {
"import/resolver": {
"typescript": {}
}
}
}

+ 3
- 107
.gitignore Целия файл

@@ -1,109 +1,5 @@
# ---> Node
# Logs
logs
node_modules
dist
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*

# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
*.logs
coverage
*.lcov

# nyc test coverage
.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# TypeScript v1 declaration files
typings/

# TypeScript cache
*.tsbuildinfo

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variables file
.env
.env.test

# parcel-bundler cache (https://parceljs.org/)
.cache

# Next.js build output
.next

# Nuxt.js build / generate output
.nuxt
dist

# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public

# vuepress build output
.vuepress/dist

# Serverless directories
.serverless/

# FuseBox cache
.fusebox/

# DynamoDB Local files
.dynamodb/

# TernJS port file
.tern-port

# Stores VSCode versions used for testing VSCode extensions
.vscode-test


+ 11
- 0
Dockerfile.development Целия файл

@@ -0,0 +1,11 @@
FROM node:14.15.1


WORKDIR /app
RUN apt-get update
RUN apt-get install build-essential libpoppler-cpp-dev pkg-config python-dev -y
RUN apt install poppler-utils -y


EXPOSE 3333
CMD yarn && yarn dev:server

+ 40
- 1
README.md Целия файл

@@ -1,2 +1,41 @@
# pdf-extract-Via_Software


<h1 align="center">Extract data from PDF</h1>

# Usando o template
## Criando um projeto novo
1. Acesse a pagina do gitea: https://gitea.tron.com.br/Tron_Informatica/pdf-extract-template
2. Localize o botão "Usar este modelo" <br>
![Pagina do gitea - Projeto](https://gitea.tron.com.br/Tron_Informatica/pdf-extract-template/raw/branch/master/resources/gitea-template.png) <br>
3. Preencha os campos e crie o repositorio. Marque a opção "Conteúdo Git (Branch padrão)" e lembre de trocar o proprietario para 'Tron_informatica' <br>
![Pagina do gitea - Novo repositorio](https://gitea.tron.com.br/Tron_Informatica/pdf-extract-template/raw/branch/master/resources/gitea-novo-repo.png) <br>
4. clone o novo repositório criado

## Usando o template
1. Troque o nome do projeto nos arquivos: docker-composer.yaml e package.json <br>
![Arquivos para renomear](https://gitea.tron.com.br/Tron_Informatica/pdf-extract-template/raw/branch/master/resources/renomear-projeto.png) <br>
2. Crie seus controllers, services e rotas, dexei alguns arquivos como teste <br>
![Arquivos para renomear](https://gitea.tron.com.br/Tron_Informatica/pdf-extract-template/raw/branch/master/resources/arquivos-exemplos.png) <br>

## Scripts
### Rodando projeto
```console
:~$ yarn docker:server
```

### Exibindo o bash do container
Obs: o projeto precisa está em execução
```console
:~$ yarn docker:bash
```

### Rodando teste unitários
```console
:~$ yarn docker:test
```

### Instalando dependência
```console
:~$ yarn docker:bash
root@947961d6b834:/app# yarn add <dependência>
```

+ 19
- 0
babel.config.js Целия файл

@@ -0,0 +1,19 @@
module.exports = {
presets: [
['@babel/preset-env', { targets: { node: 'current' } }],
'@babel/preset-typescript',
['const-enum', { "transform": "constObject" }]
],
plugins: [
['module-resolver', {
alias: {
'@modules': './src/modules',
'@config': './src/config',
'@shared': './src/shared'
}
}],
'babel-plugin-transform-typescript-metadata',
['@babel/plugin-proposal-decorators', { 'legacy': true }],
['@babel/plugin-proposal-class-properties', { 'loose': true }]
],
}

+ 17
- 0
docker-compose.yaml Целия файл

@@ -0,0 +1,17 @@
version: "3"
services:
node:
build:
context: ./
dockerfile: Dockerfile.development
container_name: nome-do-projeto
tty: true
volumes:
- ./:/app
ports:
- "3333:3333"
networks:
- nome-do-projeto
networks:
nome-do-projeto:
driver: bridge

+ 193
- 0
jest.config.js Целия файл

@@ -0,0 +1,193 @@
// For a detailed explanation regarding each configuration property, visit:
// https://jestjs.io/docs/en/configuration.html

module.exports = {
// All imported modules in your tests should be mocked automatically
// automock: false,

// Stop running tests after `n` failures
// bail: 0,

// The directory where Jest should store its cached dependency information
// cacheDirectory: "C:\\Users\\danil\\AppData\\Local\\Temp\\jest",

// Automatically clear mock calls and instances between every test
clearMocks: true,

// Indicates whether the coverage information should be collected while executing the test
collectCoverage: true,

// An array of glob patterns indicating a set of files for which coverage information should be collected
collectCoverageFrom: [
'<rootDir>/src/services/**/*.ts',
],

// The directory where Jest should output its coverage files
coverageDirectory: 'coverage',

// An array of regexp pattern strings used to skip coverage collection
// coveragePathIgnorePatterns: [
// "\\\\node_modules\\\\"
// ],

// Indicates which provider should be used to instrument code for coverage
// coverageProvider: "babel",

// A list of reporter names that Jest uses when writing coverage reports
coverageReporters: [
"text-summary",
"lcov",
],

// An object that configures minimum threshold enforcement for coverage results
// coverageThreshold: undefined,

// A path to a custom dependency extractor
// dependencyExtractor: undefined,

// Make calling deprecated APIs throw helpful error messages
// errorOnDeprecated: false,

// Force coverage collection from ignored files using an array of glob patterns
// forceCoverageMatch: [],

// A path to a module which exports an async function that is triggered once before all test suites
// globalSetup: undefined,

// A path to a module which exports an async function that is triggered once after all test suites
// globalTeardown: undefined,

// A set of global variables that need to be available in all test environments
// globals: {},

// The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
// maxWorkers: "50%",

// An array of directory names to be searched recursively up from the requiring module's location
// moduleDirectories: [
// "node_modules"
// ],

// An array of file extensions your modules use
// moduleFileExtensions: [
// "js",
// "json",
// "jsx",
// "ts",
// "tsx",
// "node"
// ],

// A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
//moduleNameMapper: pathsToModuleNameMapper(compilerOptions.paths, {prefix: '<rootDir>/src/'}),

// An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
// modulePathIgnorePatterns: [],

// Activates notifications for test results
// notify: false,

// An enum that specifies notification mode. Requires { notify: true }
// notifyMode: "failure-change",

// A preset that is used as a base for Jest's configuration
preset: 'ts-jest',

// Run tests from one or more projects
// projects: undefined,

// Use this configuration option to add custom reporters to Jest
// reporters: undefined,

// Automatically reset mock state between every test
// resetMocks: false,

// Reset the module registry before running each individual test
// resetModules: false,

// A path to a custom resolver
// resolver: undefined,

// Automatically restore mock state between every test
// restoreMocks: false,

// The root directory that Jest should scan for tests and modules within
// rootDir: './src',

// A list of paths to directories that Jest should use to search for files in
// roots: [
// "<rootDir>"
// ],

// Allows you to use a custom runner instead of Jest's default test runner
// runner: "jest-runner",

// The paths to modules that run some code to configure or set up the testing environment before each test
// setupFiles: [],

// A list of paths to modules that run some code to configure or set up the testing framework before each test
setupFilesAfterEnv: [
"./jest.setup.ts"
],

// The number of seconds after which a test is considered as slow and reported as such in the results.
// slowTestThreshold: 5,

// A list of paths to snapshot serializer modules Jest should use for snapshot testing
// snapshotSerializers: [],

// The test environment that will be used for testing
testEnvironment: "node",

// Options that will be passed to the testEnvironment
// testEnvironmentOptions: {},

// Adds a location field to test results
// testLocationInResults: false,

// The glob patterns Jest uses to detect test files
testMatch: [
"**/*.spec.ts",
],

// An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
// testPathIgnorePatterns: [
// "\\\\node_modules\\\\"
// ],

// The regexp pattern or array of patterns that Jest uses to detect test files
// testRegex: [],

// This option allows the use of a custom results processor
// testResultsProcessor: undefined,

// This option allows use of a custom test runner
// testRunner: "jasmine2",

// This option sets the URL for the jsdom environment. It is reflected in properties such as location.href
// testURL: "http://localhost",

// Setting this value to "fake" allows the use of fake timers for functions such as "setTimeout"
// timers: "real",

// A map from regular expressions to paths to transformers
// transform: undefined,

// An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
// transformIgnorePatterns: [
// "\\\\node_modules\\\\",
// "\\.pnp\\.[^\\\\]+$"
// ],

// An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
// unmockedModulePathPatterns: undefined,

// Indicates whether each individual test should be reported during the run
// verbose: undefined,

// An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
// watchPathIgnorePatterns: [],

// Whether to use watchman for file crawling
// watchman: true,
};

+ 2
- 0
jest.setup.ts Целия файл

@@ -0,0 +1,2 @@
import 'reflect-metadata';
import './src/containers';

+ 13
- 0
ormconfig.json Целия файл

@@ -0,0 +1,13 @@
{
"type": "mongodb",
"host": "mongo",
"port": 27017,
"username": "",
"password": "",
"database": "db-extract-pdf",
"synchronize": false,
"logging": false,
"entities": [
"src/modules/**/typeorm/entities/*.ts"
]
}

+ 72
- 0
package.json Целия файл

@@ -0,0 +1,72 @@
{
"name": "nome-do-projeto",
"version": "1.0.0",
"description": "Extraçao de dados de arquivos PDF",
"main": "server.js",
"author": "Daniel Souza <daniel.souza@tron.com.br>",
"license": "MIT",
"private": true,
"scripts": {
"build": "rm -rf ./dist && babel src --extensions \".js,.ts\" --out-dir dist --copy-files",
"start": "NODE_ENV=production node ./src/http/server.ts",
"dev:server": "cross-env NODE_ENV=development ts-node-dev -r tsconfig-paths/register --inspect --respawn --transpile-only --ignore-watch node_modules ./src/http/server.ts",
"docker:run": "docker-compose -f ./docker-compose.yaml down && docker-compose -f ./docker-compose.yaml up --build -d",
"docker:logs": "docker logs -f nome-do-projeto",
"docker:server": "yarn docker:run && yarn docker:logs",
"docker:bash": "docker exec -it nome-do-projeto bash",
"docker:test": "docker-compose -f ./docker-compose.yaml up --build -d && docker exec -it nome-do-projeto yarn test",
"test": "jest"
},
"dependencies": {
"@types/mongodb": "^3.6.0",
"axios": "^0.20.0",
"cors": "^2.8.5",
"date-fns": "^2.16.1",
"dotenv": "^8.2.0",
"express": "^4.17.1",
"express-async-errors": "^3.1.1",
"mongodb": "^3.6.3",
"multer": "^1.4.2",
"pdf-parse": "^1.1.1",
"reflect-metadata": "^0.1.13",
"tsyringe": "^4.3.0",
"typeorm": "^0.2.29",
"uuid": "^8.3.1",
"yup": "^0.31.1"
},
"devDependencies": {
"@babel/cli": "^7.12.1",
"@babel/core": "^7.12.3",
"@babel/node": "^7.12.1",
"@babel/plugin-proposal-class-properties": "^7.12.1",
"@babel/plugin-proposal-decorators": "^7.12.1",
"@babel/plugin-transform-typescript": "^7.12.1",
"@babel/preset-env": "^7.12.1",
"@babel/preset-typescript": "^7.12.1",
"@types/cors": "^2.8.7",
"@types/express": "^4.17.8",
"@types/jest": "^26.0.19",
"@types/multer": "^1.4.4",
"@types/pdf-parse": "^1.1.0",
"@types/pdfjs-dist": "^2.1.7",
"@types/uuid": "^8.3.0",
"@types/yup": "^0.29.9",
"@typescript-eslint/eslint-plugin": "^4.4.0",
"@typescript-eslint/parser": "^4.4.0",
"babel-plugin-module-resolver": "^4.0.0",
"babel-plugin-transform-typescript-metadata": "^0.3.1",
"babel-preset-const-enum": "^1.0.0",
"cross-env": "^7.0.2",
"eslint": "6.8.0",
"eslint-config-airbnb-base": "^14.2.0",
"eslint-config-prettier": "^6.11.0",
"eslint-import-resolver-typescript": "^2.2.1",
"eslint-plugin-import": "^2.21.2",
"eslint-plugin-prettier": "^3.1.4",
"jest": "^26.6.3",
"prettier": "^2.1.0",
"ts-jest": "^26.4.4",
"ts-node-dev": "^1.0.0-pre.63",
"typescript": "^4.0.3"
}
}

+ 6
- 0
prettier.config.js Целия файл

@@ -0,0 +1,6 @@
module.exports = {
singleQuote: true,
trailingComma: 'all',
arrowParens: 'avoid',
printWidth: 100,
}

Двоични данни
resources/arquivos-exemplos.png Целия файл

Преди След
Ширина: 506  |  Височина: 112  |  Големина: 12 KiB

Двоични данни
resources/gitea-novo-repo.png Целия файл

Преди След
Ширина: 725  |  Височина: 698  |  Големина: 51 KiB

Двоични данни
resources/gitea-template.png Целия файл

Преди След
Ширина: 1023  |  Височина: 784  |  Големина: 104 KiB

Двоични данни
resources/renomear-projeto.png Целия файл

Преди След
Ширина: 480  |  Височина: 287  |  Големина: 27 KiB

Двоични данни
src/__tests__/resources/empty-pdf.pdf Целия файл


+ 224
- 0
src/__tests__/resources/recibo-pagamento-data.json Целия файл

@@ -0,0 +1,224 @@
[
{
"company": {
"name": "INSTITUTO DE EDUCACAO VOOS OLYMPIO LTDA",
"code": "00261",
"cnpj": "31001804000139",
"refEndDate": "2020-10-31",
"refStartDate": "2020-10-01",
"address": "COND SOLAR DE BRASILIA QD 02 BL B, 207 LOTES 3 E 4"
},
"employee": {
"code": "000001",
"name": "RAPHAEL VOOS LENZI",
"salary": 3000,
"role": "SUPERVISOR PEDAGOGICO",
"admissionDate": "01/09/2018",
"baseINSS": 3000,
"aliquotINSS": 9.3873,
"baseFGTS": 3000,
"valueFGTS": 240,
"baseIRRF": 2718.38,
"events": [
{
"code": "001",
"description": "Salário Base",
"discount": 0,
"eventReference": "",
"earnings": 3000
},
{
"code": "615",
"description": "TAXA ASSISTENCIAL 2%",
"discount": 60,
"eventReference": "",
"earnings": 0
},
{
"code": "903",
"description": "INSS Folha",
"discount": 281.62,
"eventReference": "",
"earnings": 0
},
{
"code": "914",
"description": "IRRF Folha",
"discount": 61.08,
"eventReference": "",
"earnings": 0
}
]
}
},
{
"company": {
"name": "INSTITUTO DE EDUCACAO VOOS OLYMPIO LTDA",
"code": "00261",
"cnpj": "31001804000139",
"refEndDate": "2020-10-31",
"refStartDate": "2020-10-01",
"address": "COND SOLAR DE BRASILIA QD 02 BL B, 207 LOTES 3 E 4"
},
"employee": {
"code": "000005",
"name": "MARIA BÁRBARA SEIXO DE BRITTO DE MELO",
"salary": 1536.9,
"role": "ASSISTENTE ADMINISTRATIVO",
"admissionDate": "02/01/2019",
"baseINSS": 640.37,
"aliquotINSS": 7.5,
"baseFGTS": 640.37,
"valueFGTS": 51.22,
"baseIRRF": 592.35,
"events": [
{
"code": "001",
"description": "Salário Base",
"discount": 0,
"eventReference": "",
"earnings": 640.37
},
{
"code": "637",
"description": "PLANO DE SAÚDE",
"discount": 202.51,
"eventReference": "",
"earnings": 0
},
{
"code": "604",
"description": "Vale Transporte 6%",
"discount": 38.42,
"eventReference": "",
"earnings": 0
},
{
"code": "615",
"description": "TAXA ASSISTENCIAL 2%",
"discount": 30.72,
"eventReference": "",
"earnings": 0
},
{
"code": "903",
"description": "INSS Folha",
"discount": 48.02,
"eventReference": "",
"earnings": 0
}
]
}
},
{
"company": {
"name": "INSTITUTO DE EDUCACAO VOOS OLYMPIO LTDA",
"code": "00261",
"cnpj": "31001804000139",
"refEndDate": "2020-10-31",
"refStartDate": "2020-10-01",
"address": "COND SOLAR DE BRASILIA QD 02 BL B, 207 LOTES 3 E 4"
},
"employee": {
"code": "000011",
"name": "VALDIRA PEREIRA",
"salary": 1229.52,
"role": "AUXILIAR DE SERVIÇOS GERAIS",
"admissionDate": "16/10/2019",
"baseINSS": 512.29,
"aliquotINSS": 7.5,
"baseFGTS": 512.29,
"valueFGTS": 40.98,
"baseIRRF": 284.28,
"events": [
{
"code": "001",
"description": "Salário Base",
"discount": 0,
"eventReference": "",
"earnings": 512.29
},
{
"code": "599",
"description": "Salário Família",
"discount": 0,
"eventReference": "001,00",
"earnings": 48.62
},
{
"code": "604",
"description": "Vale Transporte 6%",
"discount": 30.73,
"eventReference": "",
"earnings": 0
},
{
"code": "615",
"description": "TAXA ASSISTENCIAL 2%",
"discount": 24.6,
"eventReference": "",
"earnings": 0
},
{
"code": "903",
"description": "INSS Folha",
"discount": 38.42,
"eventReference": "",
"earnings": 0
}
]
}
},
{
"company": {
"name": "INSTITUTO DE EDUCACAO VOOS OLYMPIO LTDA",
"code": "00261",
"cnpj": "31001804000139",
"refEndDate": "2020-10-31",
"refStartDate": "2020-10-01",
"address": "COND SOLAR DE BRASILIA QD 02 BL B, 207 LOTES 3 E 4"
},
"employee": {
"code": "000012",
"name": "BRUNO LINS VOOS",
"salary": 1536.9,
"role": "CONSULTOR DE VENDAS",
"admissionDate": "13/01/2020",
"baseINSS": 640.37,
"aliquotINSS": 7.5,
"baseFGTS": 640.37,
"valueFGTS": 51.22,
"baseIRRF": 592.35,
"events": [
{
"code": "001",
"description": "Salário Base",
"discount": 0,
"eventReference": "",
"earnings": 640.37
},
{
"code": "604",
"description": "Vale Transporte 6%",
"discount": 38.42,
"eventReference": "",
"earnings": 0
},
{
"code": "615",
"description": "TAXA ASSISTENCIAL 2%",
"discount": 30.72,
"eventReference": "",
"earnings": 0
},
{
"code": "903",
"description": "INSS Folha",
"discount": 48.02,
"eventReference": "",
"earnings": 0
}
]
}
}
]

Двоични данни
src/__tests__/resources/recibo-pagamento.pdf Целия файл


+ 107
- 0
src/__tests__/services/mapaFolha/ExtractDataPDFMapaFolhaService.spec.ts Целия файл

@@ -0,0 +1,107 @@
import { container } from 'tsyringe';
import path from 'path';
import { promises as fs } from 'fs';
import { Readable } from 'stream';
import AppError from '../../../erros/AppError';
import ExtractDataPDFMapaFolhaService from '../../../services/mapaFolha/ExtractDataPDFMapaFolhaService';
import reciboPagamentoData from '../../resources/recibo-pagamento-data.json';

describe('Mapa Folha - ExtractDataPDFMapaFolhaService', () => {
afterEach(() => {
jest.restoreAllMocks();
});

it('Deve ser capaz de extrair os dados do PDF do mapa da folha ', async () => {
const extractDataPDFMapaFolha = container.resolve(ExtractDataPDFMapaFolhaService);

const filePath = path.resolve(__dirname, '..', '..', 'resources', 'recibo-pagamento.pdf');

const file = await fs.readFile(filePath);
const { size } = await fs.stat(filePath);
const readableStream = new Readable();

const fileMulter: Express.Multer.File = {
fieldname: 'pdf',
originalname: 'report-mapa-folha.pdf',
encoding: 'utf-8',
mimetype: 'application/pdf',
size,
stream: readableStream,
destination: filePath,
filename: 'report-mapa-folha.pdf',
path: filePath,
buffer: file,
};

const data = await extractDataPDFMapaFolha.execute({
files: [fileMulter],
});

expect(data).toEqual(expect.arrayContaining(reciboPagamentoData));
});

it('Deve gerar uma exceção caso não seja passado o arquivo ', async () => {
const params = {} as { files: Express.Multer.File[] };

const extractDataPDFMapaFolha = container.resolve(ExtractDataPDFMapaFolhaService);

await expect(extractDataPDFMapaFolha.execute(params)).rejects.toBeInstanceOf(AppError);
});

it('Deve gerar uma exceção caso o arquivo não seja um PDF 1', async () => {
const extractDataPDFMapaFolha = container.resolve(ExtractDataPDFMapaFolhaService);

const filePath = path.resolve(__dirname, '..', '..', 'resources', 'recibo-pagamento-data.json');

const file = await fs.readFile(filePath);
const { size } = await fs.stat(filePath);
const readableStream = new Readable();

const fileMulter: Express.Multer.File = {
fieldname: 'pdf',
originalname: 'recibo-pagamento-data.json',
encoding: 'utf-8',
mimetype: 'application/json',
size,
stream: readableStream,
destination: filePath,
filename: 'recibo-pagamento-data.json',
path: filePath,
buffer: file,
};

await expect(extractDataPDFMapaFolha.execute({ files: [fileMulter] })).rejects.toBeInstanceOf(
AppError,
);
});

it('Não deve gerar uma exceção caso receba um PDF diferente do experado ', async () => {
const extractDataPDFMapaFolha = container.resolve(ExtractDataPDFMapaFolhaService);

const filePath = path.resolve(__dirname, '..', '..', 'resources', 'empty-pdf.pdf');

const file = await fs.readFile(filePath);
const { size } = await fs.stat(filePath);

const readableStream = new Readable();

const fileMulter: Express.Multer.File = {
fieldname: 'pdf',
originalname: 'empty-pdf.pdf',
encoding: 'utf-8',
mimetype: 'application/pdf',
size,
stream: readableStream,
destination: filePath,
filename: 'empty-pdf.pdf',
path: filePath,
buffer: file,
};

const data = await extractDataPDFMapaFolha.execute({
files: [fileMulter],
});

expect(data).toEqual([]);
});
});

+ 22
- 0
src/config/upload.ts Целия файл

@@ -0,0 +1,22 @@
import multer from 'multer';
import crypto from 'crypto';
import os from 'os';

const tmpFolder = os.tmpdir();

export default {
tempdir: {
directory: tmpFolder,

storage: multer.diskStorage({
destination: tmpFolder,
filename(request, file, callback) {
const fileHash = crypto.randomBytes(10).toString('hex');
const originalname = file.originalname.replace(/[^0-9-a-zA-Z.]/g, '-');
const filename = `${fileHash}-${originalname}`;

return callback(null, filename);
},
}),
},
};

+ 5
- 0
src/containers/index.ts Целия файл

@@ -0,0 +1,5 @@
import { container } from 'tsyringe';
import IPdfToTextProvider from './providers/pdfToText/IPdfToTextProvider';
import LinuxPdfToText from './providers/pdfToText/implementations/LinuxPdfToText';

container.registerSingleton<IPdfToTextProvider>('PdfToTextProvider', LinuxPdfToText);

+ 5
- 0
src/containers/providers/pdfToText/IPdfToTextProvider.ts Целия файл

@@ -0,0 +1,5 @@
import IOptionsPdfParseDTO from './dto/IOptionsPdfParseDTO';

export default interface IPdfToTextProvider {
extract(filePath: string, options?: IOptionsPdfParseDTO): Promise<string>;
}

+ 3
- 0
src/containers/providers/pdfToText/dto/IOptionsPdfParseDTO.ts Целия файл

@@ -0,0 +1,3 @@
export default interface IOptionsPdfParseDTO {
pageNumber?: number;
}

+ 32
- 0
src/containers/providers/pdfToText/implementations/LinuxPdfToText.ts Целия файл

@@ -0,0 +1,32 @@
import fs from 'fs';
import { v4 } from 'uuid';
import os from 'os';
import { exec } from 'child_process';
import path from 'path';

import IPdfToTextProvider from '../IPdfToTextProvider';
import IOptionsPdfParseDTO from '../dto/IOptionsPdfParseDTO';

export default class LinuxPdfToText implements IPdfToTextProvider {
public async extract(filePath: string, options: IOptionsPdfParseDTO = {}): Promise<string> {
const tempTextFile = path.resolve(os.tmpdir(), v4());
const { pageNumber } = options;

const pageNumberOption = pageNumber !== undefined ? `-f ${pageNumber} -l ${pageNumber}` : '';

const command = `pdftotext -layout ${pageNumberOption} ${filePath} ${tempTextFile}`;

return new Promise((resolve, reject) => {
exec(command, async err => {
if (err) {
return reject(err);
}

const contentFile = await fs.promises.readFile(tempTextFile);

await fs.promises.unlink(tempTextFile);
return resolve(contentFile.toString());
});
});
}
}

+ 13
- 0
src/containers/providers/pdfToText/implementations/PdfParseProvider.ts Целия файл

@@ -0,0 +1,13 @@
import pdfParse from 'pdf-parse';
import fs from 'fs';
import IPdfToTextProvider from '../IPdfToTextProvider';

export default class PdfParseProvider implements IPdfToTextProvider {
public async extract(filePath: string): Promise<string> {
const dataBuffer = fs.readFileSync(filePath);

const data = await pdfParse(dataBuffer);

return data.text;
}
}

+ 10
- 0
src/erros/AppError.ts Целия файл

@@ -0,0 +1,10 @@
export default class AppError {
public readonly message: string;

public readonly statusCode: number;

public constructor(message: string, statusCode = 400) {
this.message = message;
this.statusCode = statusCode;
}
}

+ 15
- 0
src/http/controllers/ExempleController.ts Целия файл

@@ -0,0 +1,15 @@
import { Request, Response } from 'express';
import { container } from 'tsyringe';
import ExempleService from '../../services/exemple/ExempleService';

export default class ExempleController {
public async exemple(request: Request, response: Response): Promise<Response> {
const files = Array.isArray(request.files) ? request.files : request.files.pdf;

const exemple = container.resolve(ExempleService);

const responseData = await exemple.execute({ files });

return response.json(responseData);
}
}

+ 23
- 0
src/http/middlewares/error.ts Целия файл

@@ -0,0 +1,23 @@
import { Response, Request, NextFunction } from 'express';
import AppError from '../../erros/AppError';

export default function error(
err: Error,
_request: Request,
response: Response,
_next: NextFunction,
): Response {
if (err instanceof AppError) {
return response.status(err.statusCode).json({
status: 'error',
message: err.message,
});
}

console.error(err);

return response.status(500).json({
status: 'error',
message: 'Internal server error',
});
}

+ 12
- 0
src/http/routes/exemple.routes.ts Целия файл

@@ -0,0 +1,12 @@
import { Router } from 'express';
import multer from 'multer';
import uploadConfig from '../../config/upload';
import ExempleController from '../controllers/ExempleController';

const exempleRoutes = Router();
const exempleController = new ExempleController();
const upload = multer(uploadConfig.tempdir);

exempleRoutes.post('/exemple', upload.array('pdf'), exempleController.exemple);

export default exempleRoutes;

+ 8
- 0
src/http/routes/index.ts Целия файл

@@ -0,0 +1,8 @@
import { Router } from 'express';
import exempleRoutes from './exemple.routes';

const routes = Router();

routes.use('/exemple', exempleRoutes);

export default routes;

+ 22
- 0
src/http/server.ts Целия файл

@@ -0,0 +1,22 @@
import 'reflect-metadata';
import 'express-async-errors';
import 'dotenv/config';
import '../containers';

import express from 'express';
import cors from 'cors';
import net from 'net';
import error from './middlewares/error';
import routes from './routes';

const app = express();
app.use(cors());
app.use(express.json({ limit: '50MB' }));
app.use(routes);
app.use(error);

const listener = app.listen(3333, () => {
const { port } = listener.address() as net.AddressInfo;

console.log(`Server started on port ${port}`);
});

+ 58
- 0
src/services/exemple/ExempleService.ts Целия файл

@@ -0,0 +1,58 @@
import { inject, injectable } from 'tsyringe';
import AppError from '../../erros/AppError';
import IPdfToTextProvider from '../../containers/providers/pdfToText/IPdfToTextProvider';
import regexMapaFolha from '../../utils/regex/regexMapaFolha';
import * as format from '../../utils/format';

interface IRequest {
files: Express.Multer.File[];
}

interface ICompanyContent {
name: string;
code: string;
cnpj: string;
refEndDate: string;
refStartDate: string;
address: string;
contentEmployee: string;
}

type IResponse = Array<{
fileName: string;
content: string;
}>;

@injectable()
export default class ExempleService {
constructor(
@inject('PdfToTextProvider')
private pdfToTextProvider: IPdfToTextProvider,
) {}

public async execute({ files }: IRequest): Promise<IResponse> {
if (!files) {
throw new AppError('File is required');
}

files.forEach(file => {
if (file.mimetype !== 'application/pdf') {
throw new AppError('Only PDF is accepted');
}
});

const response: IResponse = [];

for (let indexFile = 0; indexFile < files.length; indexFile++) {
const file = files[indexFile];
const contentFile = await this.pdfToTextProvider.extract(file.path);

response.push({
fileName: file.filename,
content: contentFile,
});
}

return response;
}
}

+ 16
- 0
src/utils/date/dates.ts Целия файл

@@ -0,0 +1,16 @@
const dates = [
{ description: 'janeiro', value: 1 },
{ description: 'fevereiro', value: 2 },
{ description: 'março', value: 3 },
{ description: 'abril', value: 4 },
{ description: 'maio', value: 5 },
{ description: 'junho', value: 6 },
{ description: 'julho', value: 7 },
{ description: 'agosto', value: 8 },
{ description: 'setembro', value: 9 },
{ description: 'outubro', value: 10 },
{ description: 'novembro', value: 11 },
{ description: 'dezembro', value: 12 },
];

export default dates;

+ 33
- 0
src/utils/format/index.ts Целия файл

@@ -0,0 +1,33 @@
import dates from '../date/dates';

export const registryRemoveMask = (cnpj: string): string => cnpj?.replace(/\D/gm, '') || '';

export const competenceDescriptionToDate = (competenceDescription: string): string => {
const [monthDescription, year] = competenceDescription.split('de');

const month = dates.find(date => {
const dateDescriptionNormalized = date.description.toLocaleLowerCase('en-US').trim();
const monthDescriptionNormalized = monthDescription.toLocaleLowerCase('en-US').trim();

return dateDescriptionNormalized === monthDescriptionNormalized;
})?.value;

const monthFormatted = `00${month}`.slice(-2);

return `${monthFormatted}/${year.trim()}`;
};

export const brazilianDateToEnglish = (brazilianDate: string): string => {
const date = brazilianDate || '';
const [day, month, year] = date.split('/');

return day && month && year ? `${year}-${month}-${day}` : '';
};

export const brazilianMoneyToNumber = (money: string): number => {
const value = money?.replace(/\./gm, '').replace(/,/, '.');

return value ? Number(value) : 0;
};

export default {};

+ 39
- 0
src/utils/regex/regexMapaFolha.ts Целия файл

@@ -0,0 +1,39 @@
const regexMapaFolha = {
companyName: (): RegExp => /Empresa\s*:\s*(?<companyName>.*?)\s*\(\s*(\d{1,})\s*\)\s*Página/gm,

companyCode: (): RegExp => /Empresa\s*:\s*.*?\s*\(\s*(?<companyCode>\d{1,})\s*\)\s*Página/gm,

companyAddress: (): RegExp => /End\.\s*:\s*(?<companyAddress>.*?)CNPJ\/CEI/gm,

/**
* Em caso de endereços grande pode quebrar o CNPJ por isso estou pegando qualquer ocorrencia de 14 caracteres numericos entre "CNPJ/CEI" e "Ref"
* até o momento não teve caso de partir o cnpj no meio
*/
companyCNPJ: (): RegExp => /CNPJ\/CEI:((.|\n)*?)(?<companyCNPJ>\d{14})((.|\n)*?)Ref\.:/gm,

reportReference: (): RegExp =>
/Ref\.:\s*(?<refStartDate>\d{2}\/\d{2}\/\d{4})\s*a\s*(?<refEndDate>\d{2}\/\d{2}\/\d{4})/gm,

contentEmployePage: (): RegExp =>
/Ref\.:\s*\d{2}\/\d{2}\/\d{4}\s*a\s*\d{2}\/\d{2}\/\d{4}(.*?)\n{1,}(?<contentEmployePage>(.|\n)*)/gm,

employeeInfo: (): RegExp =>
/^(?<employeeCode>\d{1,})\s*(?<employeeName>.*?)(?<employeeSalary>(\.?\d{1,}){1,},\d{2})\s*Função\s*:(?<employeeRole>(.|\n)*?)(?<employeeContentRest>Livro(.|\n)*?Base INSS:.*$)/gm,

employeeAdmissionDate: (): RegExp =>
/Admissão\s*:\s*(?<employeeAdmissionDate>\d{2}\/\d{2}\/\d{4})/gm,

employeeEvents: (): RegExp => /Admissão\s*:(.*?)\n(?<events>(.|\n)*?)(.*)\n\s*\*{5,}/gm,

eventReference: (): RegExp => /.*?\s{2,}(?<eventReference>.*?)\s{2,}.*/g,

eventInfo: (): RegExp => /^(?<eventCode>\d{1,})\s*(?<eventDescription>(.*?))\s{2}/g,

inss: (): RegExp => /Base\sINSS:(?<baseINSS>.*?)\(Aliq\.:(?<aliquotINSS>(.*?))%\)/gm,

fgts: (): RegExp => /Base\sFGTS:(?<baseFGTS>.*?)\(Valor:(?<valueFGTS>(.*?))\)/gm,

baseIRRF: (): RegExp => /Base\sIRRF\sFolha:\s(?<baseIRRF>.{1,15})/gm,
};

export default regexMapaFolha;

+ 16
- 0
tsconfig.json Целия файл

@@ -0,0 +1,16 @@
{
"compilerOptions": {
"target": "es5",
"module": "commonjs",
"outDir": "./dist",
"strict": true,
"strictPropertyInitialization": false,
"resolveJsonModule": true,
"esModuleInterop": true,
"experimentalDecorators": true,
"emitDecoratorMetadata": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"allowJs": true
}
}

+ 6935
- 0
yarn.lock
Файловите разлики са ограничени, защото са твърде много
Целия файл


Зареждане…
Отказ
Запис