MatchAnalyzer.cs
1 //
2 // Copyright (c) Microsoft. All rights reserved.
3 // Licensed under the MIT license.
4 //
5 // Microsoft Bot Framework: http://botframework.com
6 //
7 // Bot Builder SDK GitHub:
8 // https://github.com/Microsoft/BotBuilder
9 //
10 // Copyright (c) Microsoft Corporation
11 // All rights reserved.
12 //
13 // MIT License:
14 // Permission is hereby granted, free of charge, to any person obtaining
15 // a copy of this software and associated documentation files (the
16 // "Software"), to deal in the Software without restriction, including
17 // without limitation the rights to use, copy, modify, merge, publish,
18 // distribute, sublicense, and/or sell copies of the Software, and to
19 // permit persons to whom the Software is furnished to do so, subject to
20 // the following conditions:
21 //
22 // The above copyright notice and this permission notice shall be
23 // included in all copies or substantial portions of the Software.
24 //
25 // THE SOFTWARE IS PROVIDED ""AS IS"", WITHOUT WARRANTY OF ANY KIND,
26 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
29 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32 //
33 
34 using System;
35 using System.Collections.Generic;
36 using System.Linq;
37 
38 namespace Microsoft.Bot.Builder.FormFlow.Advanced
39 {
40  internal class MatchAnalyzer
41  {
42  internal static void PrintMatches(IEnumerable<TermMatch> matches, int offset = 0)
43  {
44  foreach (var match in matches)
45  {
46  string message = "";
47  message = message.PadRight(match.Start + offset, ' ');
48  message = message.PadRight(match.End + offset, '_');
49  Console.WriteLine("{0} {1}", message, match.Value);
50  }
51  }
52 
53  internal static bool IsIgnorable(string input, int start, int end)
54  {
55  return Language.NonWord(input.Substring(start, end - start));
56  }
57 
58  internal static bool IsSpecial(object value)
59  {
60  return value is SpecialValues && (SpecialValues)value == SpecialValues.Field;
61  }
62 
63  // Collapse together subsequent matches for same value or value has same range as no preference
64  internal static IEnumerable<TermMatch> Coalesce(IEnumerable<TermMatch> matches, string input)
65  {
66  var sorted = (from match in matches
67  orderby match.Start ascending
68  , match.End ascending
69  , match.Value == null ascending
70  select match).ToList();
71  while (sorted.Count() > 0)
72  {
73  var current = sorted.First();
74  sorted.Remove(current);
75  bool emit = true;
76  foreach (var next in sorted.ToList())
77  {
78  if (next.Covers(current))
79  {
80  // Current is completely covered by a subsequent match
81  emit = false;
82  break;
83  }
84  else if (current.End < next.Start)
85  {
86  var gap = next.Start - current.End;
87  if (gap > 1 && !Language.NonWord(input.Substring(current.End, gap)))
88  {
89  // Unmatched word means we can't merge any more
90  emit = true;
91  break;
92  }
93  else if (current.Value == next.Value || IsSpecial(current.Value) || IsSpecial(next.Value))
94  {
95  // Compatible, extend current match
96  current = new TermMatch(current.Start, next.End - current.Start, Math.Max(current.Confidence, next.Confidence),
97  IsSpecial(current.Value) ? next.Value : current.Value);
98  sorted.Remove(next);
99  }
100  }
101  else if (next.Value == null && current.Overlaps(next))
102  {
103  // Remove no preference if there is any overlapping meaning
104  sorted.Remove(next);
105  }
106  }
107  if (emit && !IsSpecial(current.Value))
108  {
109  sorted = (from match in sorted where !current.Covers(match) select match).ToList();
110  yield return current;
111  }
112  }
113  }
114 
115  internal static IEnumerable<TermMatch> HighestConfidence(IEnumerable<TermMatch> matches)
116  {
117  var sorted = (from match in matches orderby match.Start ascending, match.End ascending, match.Confidence descending select match);
118  TermMatch last = null;
119  foreach (var match in sorted)
120  {
121  if (last == null || !last.Same(match) || last.Confidence == match.Confidence)
122  {
123  last = match;
124  yield return match;
125  }
126  }
127  }
128 
129  // Full match if everything left is white space or punctuation
130  internal static bool IsFullMatch(string input, IEnumerable<TermMatch> matches, double threshold = 1.0)
131  {
132  bool fullMatch = matches.Count() > 0;
133  var sorted = from match in matches orderby match.Start ascending select match;
134  var current = 0;
135  var minConfidence = 1.0;
136  foreach (var match in sorted)
137  {
138  if (match.Start > current)
139  {
140  if (!IsIgnorable(input, current, match.Start))
141  {
142  fullMatch = false;
143  break;
144  }
145  }
146  if (match.Confidence < minConfidence)
147  {
148  minConfidence = match.Confidence;
149  }
150  current = match.End;
151  }
152  if (fullMatch && current < input.Length)
153  {
154  fullMatch = IsIgnorable(input, current, input.Length);
155  }
156  return fullMatch && minConfidence >= threshold;
157  }
158 
159  internal static IEnumerable<string> Unmatched(string input, IEnumerable<TermMatch> matches)
160  {
161  var unmatched = new List<string>();
162  var sorted = from match in matches orderby match.Start ascending select match;
163  var current = 0;
164  foreach (var match in sorted)
165  {
166  if (match.Start > current)
167  {
168  if (!IsIgnorable(input, current, match.Start))
169  {
170  yield return input.Substring(current, match.Start - current).Trim();
171  }
172  }
173  current = match.End;
174  }
175  if (input.Length > current)
176  {
177  yield return input.Substring(current).Trim();
178  }
179  }
180 
181  internal static double MinConfidence(IEnumerable<TermMatch> matches)
182  {
183  return matches.Count() == 0 ? 0.0 : (from match in matches select match.Confidence).Min();
184  }
185 
186  internal static int Coverage(IEnumerable<TermMatch> matches)
187  {
188  // TODO: This does not handle partial overlaps
189  return matches.Count() == 0 ? 0 : (from match in GroupedMatches(matches) select match.First().Length).Sum();
190  }
191 
192  internal static int BestMatches(params IEnumerable<TermMatch>[] allMatches)
193  {
194  int bestMatch = 0;
195  var confidences = (from matches in allMatches select MinConfidence(matches)).ToArray();
196  int bestCoverage = 0;
197  double bestConfidence = 0;
198  for (var i = 0; i < allMatches.Length; ++i)
199  {
200  var confidence = confidences[i];
201  var coverage = Coverage(allMatches[i]);
202  if (coverage > bestCoverage)
203  {
204  bestConfidence = confidence;
205  bestCoverage = coverage;
206  bestMatch = i;
207  }
208  else if (coverage == bestCoverage && confidence > bestConfidence)
209  {
210  bestConfidence = confidence;
211  bestCoverage = coverage;
212  bestMatch = i;
213  }
214  }
215  return bestMatch;
216  }
217 
218  internal static List<List<TermMatch>> GroupedMatches(IEnumerable<TermMatch> matches)
219  {
220  var groups = new List<List<TermMatch>>();
221  var sorted = from match in matches orderby match.Start ascending, match.End descending select match;
222  var current = sorted.FirstOrDefault();
223  var currentGroup = new List<TermMatch>();
224  foreach (var match in sorted)
225  {
226  if (match != current)
227  {
228  if (current.Same(match))
229  {
230  // No preference loses to everything
231  if (current.Value == null)
232  {
233  current = match;
234  }
235  else if (match != null)
236  {
237  // Ambiguous match
238  currentGroup.Add(match);
239  }
240  }
241  else if (!current.Overlaps(match))
242  // TODO: We are not really handling partial overlap. To do so we need a lattice.
243  {
244  // New group
245  currentGroup.Add(current);
246  groups.Add(currentGroup);
247  current = match;
248  currentGroup = new List<TermMatch>();
249  }
250  }
251  }
252  if (current != null)
253  {
254  currentGroup.Add(current);
255  groups.Add(currentGroup);
256  }
257  return groups;
258  }
259  }
260 }
SpecialValues
Enumeration of special kinds of matches.
Definition: IRecognize.cs:42