ReadFramework
FormAnalysis.h
Go to the documentation of this file.
1 /*******************************************************************************************************
2  ReadFramework is the basis for modules developed at CVL/TU Wien for the EU project READ.
3 
4  Copyright (C) 2016 Markus Diem <diem@caa.tuwien.ac.at>
5  Copyright (C) 2016 Stefan Fiel <fiel@caa.tuwien.ac.at>
6  Copyright (C) 2016 Florian Kleber <kleber@caa.tuwien.ac.at>
7 
8  This file is part of ReadFramework.
9 
10  ReadFramework is free software: you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation, either version 3 of the License, or
13  (at your option) any later version.
14 
15  ReadFramework is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program. If not, see <http://www.gnu.org/licenses/>.
22 
23  The READ project has received funding from the European Union’s Horizon 2020
24  research and innovation programme under grant agreement No 674943
25 
26  related links:
27  [1] http://www.caa.tuwien.ac.at/cvl/
28  [2] https://transkribus.eu/Transkribus/
29  [3] https://github.com/TUWien/
30  [4] http://nomacs.org
31  *******************************************************************************************************/
32 
33 #pragma once
34 
35 #include "BaseModule.h"
36 #include "LineTrace.h"
37 #include "Elements.h"
38 #pragma warning(push, 0) // no warnings from includes
39 #include <QObject>
40 
41 #include <opencv2/core.hpp>
42 #pragma warning(pop)
43 
44 // TODO: add DllExport magic
45 
46 // Qt defines
47 
48 namespace rdf {
49 
51 
52  public:
54 
55  //double threshLineLenRation() const;
56  //void setThreshLineLenRation(double s);
57 
58  double distThreshold() const;
59  void setDistThreshold(double d);
60 
61  double errorThr() const;
62  void setErrorThr(double e);
63 
64  double variationThrLower() const;
65  void setVariationThrLower(double v);
66 
67  double variationThrUpper() const;
68  void setVariationThrUpper(double v);
69 
70  double coLinearityThr() const;
71  void setCoLinearityThr(double c);
72 
73  //int searchXOffset() const;
74  //int searchYOffset() const;
75 
76  bool saveChilds() const;
77  void setSaveChilds(bool c);
78 
79  QString templDatabase() const;
80  void setTemplDatabase(QString s);
81 
82  QString evalPath() const;
83  void setevalPath(QString s);
84 
85  QString toString() const override;
86 
87  private:
88  void load(const QSettings& settings) override;
89  void save(QSettings& settings) const override;
90 
91  //QString mTemplDatabase;
92  QString mTemplDatabase = QString("C:\\Users\\flo\\projects\\READ\\formTest\\form - gt\\Table_Template_M_Freyung_014_01\\page\\M_Freyung_014 - 01_0112.xml");
93  QString mEvalPath = QString("C:\\Users\\flo\\projects\\READ\\formTest\\form - gt\\Table_Template_M_Freyung_014_01\\page\\");
94 
95  //double mThreshLineLenRatio = 0.6;
96  //double mDistThreshold = 30.0;
97  double mDistThreshold = 200.0; //threshold is set dynamically - fallback value to find line candidates within mDistThreshold
98  double mColinearityThreshold = 20; //up to which distance a line is colinear
99  double mErrorThr = 15.0; //currently not used
100  double mVariationThrLower = 0.2; //allowed variation for width/height of cells in % (lower bound)
101  double mVariationThrUpper = 0.3; //allowed variation for width/height of cells in % (upper bound)
102 
103  bool mSaveChilds = false;
104 
105  //int mSearchXOffset = 200;
106  //int mSearchYOffset = 200;
107 
108  };
109 
110 
112 
113  public:
114  //0: left 1: right 2; upper 3: bottom
116  pos_left = 0,
119  pos_bottom
120  };
121 
122 
124 
125  void setLinePos(const AssociationGraphNode::LinePosition& type);
126  AssociationGraphNode::LinePosition linePosition() const;
127 
128  void setReferenceLine(Line l);
129  Line referenceLine() const;
130 
131  int getRowIdx() const;
132  int getColIdx() const;
133  void setLineCell(int rowIdx, int colIdx);
134 
135  int rowSpan() const;
136  int colSpan() const;
137  void setSpan(int rowSpan, int colSpan);
138 
139  void setMatchedLine(Line l);
140  void setMatchedLine(Line l, double overlap, double distance);
141  Line matchedLine() const;
142  double overlap() const;
143  double distance() const;
144 
145  void addBrokenLine(Line l, int lineIdx);
146  bool brokenLinesPresent() const;
147  QVector<Line> brokenLines() const;
148  QVector<int> brokenLinesIdx() const;
149 
150  void setMatchedLineIdx(int idx);
151  int matchedLineIdx() const;
152 
153  void setCellIdx(int idx);
154  int cellIdx() const;
155 
156  void setNeighbourCellIdx(QVector<int> n);
157  QVector<int> neighbourCellIDx();
158 
159  double weight();
160 
161  QVector<int> adjacencyNodes() const;
162  //QSet<int> adjacencyNodesSet() const;
163  //void createAdjacencyNodesSet();
164  void addAdjacencyNode(int idx);
165  bool testAdjacency(QSharedPointer<AssociationGraphNode> neighbour, double distThreshold = 20, double variationThrLower = 0.2, double variationThrUpper = 0.2);
166  void clearAdjacencyList();
167 
168  int degree() const;
169 
170  bool operator< (const AssociationGraphNode& node) const;
171  static bool compareNodes(const QSharedPointer<rdf::AssociationGraphNode> n1, const QSharedPointer<rdf::AssociationGraphNode> n2);
172 
173  protected:
174  int mCellIdx = 1;
175  QVector<int> mMergedNeighbourCells;
178  int mRefRowIdx = -1;
179  int mRefColIdx = -1;
180  int mRowSpan = 0;
181  int mColSpan = 0;
182 
183  //line that represents the current cell line
186 
187  //broken line pieces, but all have a shorter overlap compared to matchedLine
188  QVector<Line> mBrokenLines;
189  QVector<int> mBrokenLinesIdx;
190 
191  double mOverlap = -1;
192  double mDistance = -1;
193 
194  QVector<int> mAdjacencyNodesIdx;
195  QSet<int> mAn;
196 
197  };
198 
200 
201  public:
202 
203  FormEvaluation();
204  void setSize(cv::Size s);
205  bool setTemplate(QString templateName);
206  void setTable(QSharedPointer<rdf::TableRegion> table);
207  cv::Mat computeTableImage(QSharedPointer<rdf::TableRegion> table, bool mergeCells = false);
208  void computeEvalCells();
209  void computeEvalTableRegion();
210 
211  double tableJaccard();
212  double tableMatch();
213 
214  QVector<double> cellJaccards();
215  double meanCellJaccard();
216  QVector<double> cellMatches();
217  double meanCellMatch();
218 
219  double missedCells(double threshold = 0.2);
220  double underSegmented(double threshold = 0.2);
221  QVector<double> underSegmentedC();
222 
223  protected:
224 
225  QSharedPointer<rdf::TableRegion> mTableRegionTemplate;
226  QSharedPointer<rdf::TableRegion> mTableRegionMatched;
227 
228  cv::Size mImageSize;
229  cv::Mat mTableTemplate;
230  cv::Mat mTableMatched;
231 
232  double mCellsTemplate = -1;
233  double mCellsMatched = -1;
234 
236  double mMatchTable;
237 
238  QVector<double> mJaccardCell;
239  QVector<double> mCellMatch;
240  QVector<double> mUnderSegmented;
241 
242  };
243 
244 
245 
246 
248 
249  public:
250  FormFeatures();
251  FormFeatures(const cv::Mat& img, const cv::Mat& mask = cv::Mat());
252 
253  void setInputImg(const cv::Mat& img);
254  void setMask(const cv::Mat& mask);
255  bool isEmpty() const override;
256  bool compute() override;
257  bool computeBinaryInput();
258 
259  //old version
260  //bool loadTemplateDatabase(QString db);
261  //QVector<rdf::FormFeatures> templatesDb() const;
262  //bool compareWithTemplate(const FormFeatures& fTempl);
263  //cv::Mat getMatchedLineImg(const cv::Mat& srcImg, const Vector2D& offset = Vector2D(0, 0)) const;
264  //QVector<rdf::Line> horLinesMatched() const;
265  //QVector<rdf::Line> verLinesMatched() const;
266  bool readTemplate(QSharedPointer<rdf::FormFeatures> templateForm);
267  bool estimateRoughAlignment(bool useBinaryImg = false);
268  cv::Mat drawAlignment(cv::Mat img = cv::Mat());
269  cv::Mat drawMatchedForm(cv::Mat img = cv::Mat(), float t = 10.0);
270  cv::Mat drawLinesNotUsedForm(cv::Mat img = cv::Mat(), float t = 10.0);
271  cv::Mat drawLines(cv::Mat img = cv::Mat(), float t = 10.0);
272  cv::Mat drawMaxClique(cv::Mat img = cv::Mat(), float t = 10.0, int idx = 0);
273  cv::Mat drawMaxCliqueNeighbours(int cellIdx, AssociationGraphNode::LinePosition lp, int nodeCnt = -1, cv::Mat img = cv::Mat(), float t = 10.0);
274  QSharedPointer<rdf::TableRegion> tableRegion();
275  QSharedPointer<rdf::TableRegion> tableRegionTemplate();
276  QVector<QSharedPointer<rdf::TableCellRaw>> createRawTableFromTemplate();
277  void createAssociationGraphNodes(QVector<QSharedPointer<rdf::TableCellRaw>> cellsR);
278  void createReducedAssociationGraphNodes(QVector<QSharedPointer<rdf::TableCellRaw>> cellsR);
279  QVector<QSharedPointer<rdf::AssociationGraphNode>> mergeColinearNodes(QVector<QSharedPointer<rdf::AssociationGraphNode>> &tmpNodes);
280  void createAssociationGraph();
281  bool** adjacencyMatrix(const QVector<QSharedPointer<rdf::AssociationGraphNode>> &associationGraphNodes);
282  void findMaxCliques();
283  void createTableFromMaxClique(const QVector<QSharedPointer<rdf::TableCell>> &cells);
284  void createTableFromMaxCliqueReduced(const QVector<QSharedPointer<rdf::TableCell>> &cells);
285  //void plausibilityCheck();
286 
287  QVector<QSet<int>> getMaxCliqueHor() const;
288  QVector<QSet<int>> getMaxCliqueVer() const;
289 
290 
291  bool matchTemplate();
292  //QVector<QSharedPointer<rdf::TableCellRaw>> findLineCandidatesForCells(QVector<QSharedPointer<rdf::TableCellRaw>> cellR);
293  //rdf::Line findLine(rdf::Line l, double distThreshold, bool &found, bool horizontal = true);
294  rdf::LineCandidates findLineCandidates(rdf::Line l, double distThreshold, bool horizontal = true);
295  //0: left 1: right 2; upper 3: bottom
296  double findMinWidth(QVector<QSharedPointer<rdf::TableCellRaw>> cellsR, int cellIdx, int neighbour);
297  rdf::Polygon createPolygon(rdf::Line tl, rdf::Line ll, rdf::Line rl, rdf::Line bl);
298  void createCellfromLineCandidates(QVector<QSharedPointer<rdf::TableCellRaw>> cellsR);
299 
300  bool isEmptyLines() const;
301  bool isEmptyTable() const;
302 
303  bool setTemplateName(QString s);
304  QString templateName() const;
305 
306  cv::Size sizeImg() const;
307  void setSize(cv::Size s);
308  QVector<rdf::Line> horLines() const;
309  void setHorLines(const QVector<rdf::Line>& h);
310  QVector<rdf::Line> verLines() const;
311  void setVerLines(const QVector<rdf::Line>& v);
312 
313  QVector<rdf::Line> usedHorLines() const;
314  QVector<rdf::Line> notUsedHorLines() const;
315  QVector<rdf::Line> filterHorLines(double minOverlap = 0.1, double distThreshold=20) const;
316  QVector<rdf::Line> useVerLines() const;
317  QVector<rdf::Line> notUseVerLines() const;
318  QVector<rdf::Line> filterVerLines(double minOverlap = 0.1, double distThreshold=20) const;
319 
320  double lineDistance(rdf::Line templateLine, rdf::Line formLine, double minOverlap = 0.1, bool horizontal = true);
321 
322  cv::Point offset() const;
323  double error() const;
324 
325  QSharedPointer<FormFeaturesConfig> config() const;
326  void setConfig(QSharedPointer<FormFeaturesConfig> c);
327 
328  cv::Mat binaryImage() const;
329  void setEstimateSkew(bool s);
330  //void setThreshLineLenRatio(float l);
331  //void setThresh(int thresh);
332  //int thresh() const;
333  QString toString() const override;
334 
335  void setFormName(QString s);
336  QString formName() const;
337 
338 
339  void setCells(QVector<QSharedPointer<rdf::TableCell>> c);
340  QVector<QSharedPointer<rdf::TableCell>> cells() const;
341  void setRegion(QSharedPointer<rdf::TableRegion> r);
342  QSharedPointer<rdf::TableRegion> region() const;
343  void setSeparators(QSharedPointer<rdf::Region> r);
344 
345  //QSharedPointer<rdf::TableCellRaw> getCellId(QVector<QSharedPointer<rdf::TableCellRaw>> cells, int id) const;
346 
347  protected:
348 
349  //old version
350  //float errLine(const cv::Mat& distImg, const rdf::Line l, cv::Point offset = cv::Point(0,0));
351  //void findOffsets(const QVector<Line>& hT, const QVector<Line>& vT, QVector<int>& offX, QVector<int>& offY) const;
352 
353 
354  private:
355  cv::Mat mSrcImg;
356  cv::Mat mMask;
357  cv::Mat mBwImg;
358  bool mEstimateSkew = false;
359  bool mPreFilter = true;
360  int preFilterArea = 10;
361  double mPageAngle = 0.0;
362  double mMinError = std::numeric_limits<double>::max();
363 
364  QVector<rdf::Line> mHorLines;
365  QVector<int> mUsedHorLineIdx;
366  QVector<rdf::Line> mVerLines;
367  QVector<int> mUsedVerLineIdx;
368 
369  QVector<QSharedPointer<rdf::AssociationGraphNode>> mANodesHorizontal;
370  QVector<QSharedPointer<rdf::AssociationGraphNode>> mANodesVertical;
371  //int mMinGraphSizeHor = 0;
372  //int mMinGraphSizeVer = 0;
373  //QVector<QSharedPointer<rdf::AssociationGraphNode>> testNodes;
374 
375  QVector<QSet<int>> mMaxCliquesHor;
376  QVector<QSet<int>> mMaxCliquesVer;
377 
378  //rdf::FormFeatures mTemplateForm;
379  QSharedPointer<rdf::FormFeatures> mTemplateForm;
380 
381  //QVector<rdf::Line> mHorLinesMatched;
382  //QVector<rdf::Line> mVerLinesMatched;
383  cv::Point mOffset = cv::Point(0,0);
384  cv::Size mSizeSrc;
385 
386  // parameters
387 
388  bool checkInput() const override;
389  QString mFormName;
390  QString mTemplateName;
391 
392  QVector<QSharedPointer<rdf::TableCell>> mCells;
393  QSharedPointer<rdf::TableRegion> mRegion;
394 
395  //only for drawing
396  QVector<QSharedPointer<rdf::TableCellRaw>> mCellsR;
397  //void load(const QSettings& settings) override;
398  //void save(QSettings& settings) const override;
399  };
400 
401 }
cv::Size mImageSize
Definition: FormAnalysis.h:228
Line mReferenceLine
Definition: FormAnalysis.h:176
Definition: BaseModule.h:63
QVector< int > mBrokenLinesIdx
Definition: FormAnalysis.h:189
#define DllCoreExport
Definition: BaseImageElement.h:43
double mMatchTable
Definition: FormAnalysis.h:236
LinePosition mLinePos
Definition: FormAnalysis.h:177
QVector< int > mAdjacencyNodesIdx
Definition: FormAnalysis.h:194
Line mMatchedLine
Definition: FormAnalysis.h:184
Definition: Shapes.h:532
Definition: FormAnalysis.h:247
QSharedPointer< rdf::TableRegion > mTableRegionMatched
Definition: FormAnalysis.h:226
QSharedPointer< rdf::TableRegion > mTableRegionTemplate
Definition: FormAnalysis.h:225
LinePosition
Definition: FormAnalysis.h:115
QSet< int > mAn
Definition: FormAnalysis.h:195
Definition: FormAnalysis.h:199
bool operator<(const PixelEdge &pe1, const PixelEdge &pe2)
Definition: Pixel.cpp:537
config
Definition: DependencyCollector.py:271
A basic line class including stroke width (thickness).
Definition: Shapes.h:68
QVector< double > mCellMatch
Definition: FormAnalysis.h:239
Definition: FormAnalysis.h:50
DllCoreExport bool save(const QImage &img, const QString &savePath, int compression=-1)
Saves the specified QImage img.
Definition: Image.cpp:180
Definition: Shapes.h:493
int mMatchedLineIdx
Definition: FormAnalysis.h:185
DllCoreExport QImage load(const QString &path, bool *ok=0)
Definition: Image.cpp:152
cv::Mat mTableMatched
Definition: FormAnalysis.h:230
double mJaccardTable
Definition: FormAnalysis.h:235
Definition: FormAnalysis.h:117
QVector< double > mJaccardCell
Definition: FormAnalysis.h:238
QVector< int > mMergedNeighbourCells
Definition: FormAnalysis.h:175
QVector< Line > mBrokenLines
Definition: FormAnalysis.h:188
This is the base class for all modules. It provides all functions which are implemented by the module...
Definition: BaseModule.h:126
Definition: Algorithms.cpp:45
Definition: FormAnalysis.h:111
cv::Mat mTableTemplate
Definition: FormAnalysis.h:229
Definition: FormAnalysis.h:118
QVector< double > mUnderSegmented
Definition: FormAnalysis.h:240