From Finite Automata to Regular Languages - Implementation Part

2020-05-12 5561 words 12 minutes

This article can be seen as “from infinite automata to regular expressions” of this additional article, mainly to provide JAVA to achieve from DFA to RE conversion. Because the code is copied from the assignment, I don’t know if there will be any copyright problem, but this is not considered commercial use, and some of the code is written by me, so it should not be a big problem. (The assignment will give some source code of Java and Haskell by default, I don’t want to write Haskell so I will use Java to implement it, interested parties can try it by themselves after copying the code)

Let’s first look at the implementation of regular expressions, where there is an abstract class of Regex, corresponding to the definition of regular expressions in another blog post, with six subclasses to serve as a basis for generalization, namely $\epsilon , \emptyset$, single-letter, concatenated, and, asterisk, and here is the code.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737



import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;

public abstract class Regex {

	/**
	 * Determines whether the given string matches the regular expression.
	 * @param word the string
	 * @return
	 */
	public abstract boolean matches(String word);

	private static final class ReadOnlyIterator<T> implements Iterator<T> {

		private final Iterator<T> it;

		public ReadOnlyIterator(Iterator<T> it) {
			this.it = it;
		}

		@Override
		public boolean hasNext() {
			return it.hasNext();
		}

		@Override
		public T next() {
			return it.next();
		}

	}

	public static final class Empty extends Regex {

		@Override
		public boolean matches(String word) {
			return false;
		}

		@Override
		protected void appendTo(StringBuilder sb, int prec) {
	      sb.append("{}");
		}

		private Empty() {
		}

		@Override
		protected int getPrecedence() {
			return 4;
		}

		@Override
		public int hashCode() {
			return 23;
		}

		@Override
		public boolean equals(Object obj) {
			if (this == obj)
				return true;
			if (obj == null)
				return false;
			return (getClass() == obj.getClass());
		}

		public int size() {
			return 1;
		}

		@Override
		public boolean isNullable() {
			return false;
		}

		@Override
		public <R> R accept(Visitor<R> vis) {
			return vis.visitEmpty();
		}

	}

	public static final class Epsilon extends Regex {
		@Override
		public boolean matches(String word) {
			return word.isEmpty();
		}

		private Epsilon() {
		}

		@Override
		protected void appendTo(StringBuilder sb, int prec) {
	      sb.append("()");
		}

		@Override
		protected int getPrecedence() {
			return 4;
		}

		@Override
		public int hashCode() {
			return 47;
		}

		@Override
		public boolean equals(Object obj) {
			if (this == obj)
				return true;
			if (obj == null)
				return false;
			return (getClass() == obj.getClass());
		}

		public int size() {
			return 1;
		}

		@Override
		public boolean isNullable() {
			return true;
		}

		@Override
		public <R> R accept(Visitor<R> vis) {
			return vis.visitEpsilon();
		}

	}

	public static final class Single extends Regex {

		private final char c;

		@Override
		public boolean matches(String word) {
			return word.length() == 1 && word.charAt(0) == c;
		}

		@Override
		protected int getPrecedence() {
			return 4;
		}

		public char getChar() {
			return c;
		}

		private Single(char c) {
			super();
			this.c = c;
		}

		@Override
		protected void appendTo(StringBuilder sb, int prec) {
			sb.append(c);
		}

		@Override
		public int hashCode() {
			final int prime = 31;
			int result = 1;
			result = prime * result + c;
			return result;
		}

		@Override
		public boolean equals(Object obj) {
			if (this == obj)
				return true;
			if (obj == null)
				return false;
			if (getClass() != obj.getClass())
				return false;
			Single other = (Single) obj;
			if (c != other.c)
				return false;
			return true;
		}

		public int size() {
			return 1;
		}

		@Override
		public boolean isNullable() {
			return false;
		}

		@Override
		public <R> R accept(Visitor<R> vis) {
			return vis.visitSingle(c);
		}

	}

	public static final class Concat extends Regex implements Iterable<Regex> {

		private final List<Regex> children;
		private final int size;
		private final boolean nullable;

		private Concat(List<Regex> children) {
			super();
			List<Regex> children2 = new LinkedList<>();
			int size = 1;
			boolean nullable = true;
			for (Regex r : children) {
				if (r instanceof Epsilon) continue;
				children2.add(r);
				size += r.size();
				nullable = nullable && r.isNullable();
			}
			this.size = size;
			this.nullable = nullable;
			this.children = Collections.unmodifiableList(children2);
		}

		@Override
		public boolean matches(String word) {
			for (int i = 0; i <= word.length(); ++i) {
				Regex concatRest = Regex.epsilon().concat(children.subList(1, children.size()));
				if (children.get(0).matches(word.substring(0, i)) && concatRest.matches(word.substring(i))
				) {
					return true;
				}
			}
			return false;
		}

		public List<Regex> getChildren() {
			return children;
		}

		@Override
		protected void appendTo(StringBuilder sb, int prec) {
			for (Regex r : children)
				r.toStringAux(sb, getPrecedence());			
		}

		@Override
		public int hashCode() {
			final int prime = 31;
			int result = 1;
			result = prime * result + ((children == null) ? 0 : children.hashCode());
			result = prime * result + size;
			return result;
		}

		@Override
		public boolean equals(Object obj) {
			if (this == obj)
				return true;
			if (obj == null)
				return false;
			if (getClass() != obj.getClass())
				return false;
			Concat other = (Concat) obj;
			if (children == null) {
				if (other.children != null)
					return false;
			} else if (!children.equals(other.children))
				return false;
			if (size != other.size)
				return false;
			return true;
		}

		@Override
		protected int getPrecedence() {
			return 1;
		}

		public int size() {
			return size;
		}

		@Override
		public boolean isNullable() {
			return nullable;
		}

		@Override
		public Iterator<Regex> iterator() {
			return new ReadOnlyIterator<Regex>(children.iterator());
		}

		public Stream<Regex> stream() {
			return children.stream();
		}

		@Override
		public <R> R accept(Visitor<R> vis) {
			return vis.visitConcat(children);
		}

	}

	public static final class Alternative extends Regex implements Iterable<Regex> {

		private final Set<Regex> children;
		private final int size;
		private final boolean nullable;

		private Alternative(Iterable<Regex> children) {
			super();
			Set<Regex> children2 = new HashSet<Regex>();
			int size = 1;
			boolean nullable = false;
			for (Regex r : children) {
				if (r instanceof Empty) continue;
				children2.add(r);
				size += r.size();
				nullable = nullable || r.isNullable();
			}
			this.children = Collections.unmodifiableSet(children2);
			this.size = size;
			this.nullable = nullable;
		}

		@Override
		public boolean matches(String word) {
			return children.stream().anyMatch(r -> r.matches(word));
		}


		public Set<Regex> getChildren() {
			return children;
		}

		@Override
		protected void appendTo(StringBuilder sb, int prec) {
			boolean first = true;
			for (Regex r : children) {
				if (!first) {
					sb.append("|");
				} else {
					first = false;
				}
				r.toStringAux(sb, getPrecedence());			
			}
		}

		@Override
		protected int getPrecedence() {
			return 0;
		}

		public int size() {
			return size;
		}

		@Override
		public int hashCode() {
			final int prime = 31;
			int result = 1;
			result = prime * result + ((children == null) ? 0 : children.hashCode());
			result = prime * result + size;
			return result;
		}

		@Override
		public boolean equals(Object obj) {
			if (this == obj)
				return true;
			if (obj == null)
				return false;
			if (getClass() != obj.getClass())
				return false;
			Alternative other = (Alternative) obj;
			if (children == null) {
				if (other.children != null)
					return false;
			} else if (!children.equals(other.children))
				return false;
			if (size != other.size)
				return false;
			return true;
		}

		@Override
		public boolean isNullable() {
			return nullable;
		}

		@Override
		public Iterator<Regex> iterator() {
			return new ReadOnlyIterator<Regex>(children.iterator());
		}

		public Stream<Regex> stream() {
			return children.stream();
		}

		@Override
		public <R> R accept(Visitor<R> vis) {
			return vis.visitAlternative(children);
		}

	}

	public static final class Star extends Regex {

		private final Regex r;
		private final int size;

		public Regex getRegex() {
			return r;
		}

		private Star(Regex r) {
			super();
			this.r = r;
			this.size = r.size() + 1;
		}

		@Override
		public boolean matches(String word) {
			if (word.isEmpty()) {
				return true;
			} else {
				for (int i = 1; i <= word.length(); ++i) {
					if (r.matches(word.substring(0, i)) && this.matches(word.substring(i))
					) {
						return true;
					}
				}
				return false;
			}
		}

		@Override
		protected int getPrecedence() {
			return 2;
		}

		@Override
		protected void appendTo(StringBuilder sb, int prec) {
			r.toStringAux(sb, getPrecedence());
			sb.append('*');
		}

		@Override
		public int hashCode() {
			final int prime = 31;
			int result = 1;
			result = prime * result + ((r == null) ? 0 : r.hashCode());
			return result;
		}

		@Override
		public boolean equals(Object obj) {
			if (this == obj)
				return true;
			if (obj == null)
				return false;
			if (getClass() != obj.getClass())
				return false;
			Star other = (Star) obj;
			if (r == null) {
				if (other.r != null)
					return false;
			} else if (!r.equals(other.r))
				return false;
			return true;
		}

		public int size() {
			return size;
		}

		@Override
		public boolean isNullable() {
			return true;
		}

		@Override
		public <R> R accept(Visitor<R> vis) {
			return vis.visitStar(r);
		}

	}

	/**
	 * Visitor interface for regular expressions, with result type R.
	 */
	public static interface Visitor<R> {
		public R visitEmpty();
		public R visitEpsilon();
		public R visitSingle(char c);
		public R visitAlternative(Set<Regex> rs);
		public R visitConcat(List<Regex> rs);
		public R visitStar(Regex r);
	}


	protected abstract void appendTo(StringBuilder sb, int prec);

	protected abstract int getPrecedence();

	public abstract <R> R accept(Visitor<R> vis);

	protected final void toStringAux(StringBuilder sb, int prec) {
		if (prec > getPrecedence()) {
			sb.append("(");
			appendTo(sb, getPrecedence());
			sb.append(")");
		} else {
			appendTo(sb, getPrecedence());
		}
	}

	public String toString() {
		StringBuilder sb = new StringBuilder();
		toStringAux(sb, 0);
		return sb.toString();
	}



	// Smart constructors
	private static final Empty EMPTY = new Empty();
	private static final Epsilon EPSILON = new Epsilon();
	private static final Map<Character, Single> singleInsts = new HashMap<>();

	public static final Regex empty() {
		return EMPTY;
	}

	public static final Regex epsilon() {
		return EPSILON;
	}

	public static final Regex single(char c) {
		Single r = singleInsts.get(c);
		if (r == null) {
			r = new Single(c);
			singleInsts.put(c, r);
		}
		return r;
	}

	public final Regex alternative(Iterator<Regex> it) {
		Set<Regex> s = new HashSet<Regex>();

		boolean nullable = false, hasEpsilon = false;
		Regex r = this;

		do {
			if (r instanceof Epsilon) {
				hasEpsilon = true;
			} else {
				nullable = nullable || r.isNullable();
				if (r instanceof Alternative) {
					for (Regex r2 : (Alternative) r)
						s.add(r2);
				} else {
					if (!(r instanceof Empty)) s.add(r);
				}
			}
			r = (it.hasNext()) ? it.next() : null;
		} while (r != null);

		if (hasEpsilon && !nullable)
			s.add(Regex.epsilon());

		if (s.size() == 0)
			return empty();
		else if (s.size() == 1)
			return s.iterator().next();
		else
			return new Alternative(s);
	}

	public final Regex alternative(Iterable<Regex> rs) {
		return alternative(rs.iterator());
	}

	public final Regex alternative(Stream<Regex> rs) {
		return alternative(rs.iterator());
	}

	public final Regex alternative(Regex... rs) {
		return alternative(Arrays.stream(rs));
	}

	public static final Regex alternatives(Iterator<Regex> it) {
		if (it.hasNext())
			return it.next().alternative(it);
		else
			return Regex.empty();
	}

	public static final Regex alternatives(Iterable<Regex> rs) {
		return alternatives(rs.iterator());
	}

	public static final Regex alternatives(Stream<Regex> rs) {
		return alternatives(rs.iterator());
	}

	public static final Regex alternatives(Regex... rs) {
		return alternatives(Arrays.stream(rs));
	}


	public final Regex concat(Iterator<Regex> it) {
		List<Regex> l = new LinkedList<Regex>();
		Regex r = this;
		do {
			if (r instanceof Empty) {
				return empty();
			} else if (r instanceof Concat) {
				for (Regex r2 : (Concat) r)
					l.add(r2);
			} else {
				if (!(r instanceof Epsilon)) l.add(r);
			}
			r = (it.hasNext()) ? it.next() : null;
		} while (r != null);

		if (l.size() == 0)
			return epsilon();
		else if (l.size() == 1)
			return l.iterator().next();
		else
			return new Concat(l);
	}

	public final Regex concat(Stream<Regex> rs) {
		return concat(rs.iterator());
	}

	public final Regex concat(Iterable<Regex> rs) {
		return concat(rs.iterator());
	}

	public final Regex concat(Regex... rs) {
		return concat(Arrays.stream(rs));
	}

	public static final Regex concats(Iterator<Regex> it) {
		if (it.hasNext())
			return it.next().concat(it);
		else
			return Regex.epsilon();
	}

	public static final Regex concats(Stream<Regex> rs) {
		return concats(rs.iterator());
	}

	public static final Regex concats(Iterable<Regex> rs) {
		return concats(rs.iterator());
	}

	public static final Regex concats(Regex... rs) {
		return concats(Arrays.stream(rs));
	}


	public static final Regex star(Regex r) {
		if (r instanceof Star)
			return r;
		else if (r instanceof Empty || r instanceof Epsilon)
			return epsilon();
		else
			return new Star(r);
	}

	public Regex star() {
		return Regex.star(this);
	}


	// Utility functions
	public abstract int size();

	/**
	 * Returns true iff the regular expression generates the empty word.
	 */
	public abstract boolean isNullable();

	public static final Regex parse(String s) {
		return (new RegexParser(s)).parse();
	}

	public Set<Character> getAlphabet() {
		final Set<Character> alphabet = new HashSet<Character>();
		var visitor = new Visitor<Void>() {
			public Void visitEmpty() {
				return null;
			}

			public Void visitEpsilon() {
				return null;
			}

			public Void visitSingle(char c) {
				alphabet.add(c);
				return null;
			}

			@Override
			public Void visitAlternative(Set<Regex> rs) {
				rs.stream().forEach(r -> r.accept(this));
				return null;
			}

			@Override
			public Void visitConcat(List<Regex> rs) {
				rs.stream().forEach(r -> r.accept(this));
				return null;
			}

			@Override
			public Void visitStar(Regex r) {
				r.accept(this);
				return null;
			}
		};
		accept(visitor);
		return alphabet;
	}

}

Well, now that you have the basis of regular expressions, write another parser to recognize regular expressions.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124


import java.util.LinkedList;
import java.util.List;

public class RegexParser {

	private static final char EOF = 0;
	char[] input;
	int pos;

	private boolean eof() {
		return pos >= input.length;
	}

	private char peek() {
		while (pos < input.length && Character.isWhitespace(input[pos]))
			pos++;
		if (pos >= input.length)
			return EOF;
		else
			return input[pos];
	}

	private String prettyPeek() {
		char c = peek();
		if (c == EOF)
			return "end of input";
		else
			return "'" + c + "'";
	}

	@SuppressWarnings("unused")
	private RegexParser() {
	}

	public RegexParser(String s) {
		input = s.toCharArray();
	}

	public static final boolean isSymbol(char c) {
		return Character.isAlphabetic(c) || Character.isDigit(c);
	}

	public Regex parse() {
		Regex r = parseAlternative();
		if (!eof()) {
			throw new IllegalArgumentException(
					String.format("Expected end of input, but got %s instead at position %d.", prettyPeek(), pos));
		}
		return r;
	}

	private Regex parseAlternative() {
		List<Regex> l = new LinkedList<Regex>();
		l.add(parseNonAlternative());

		char c = peek();
		while (c == '|') {
			pos++;
		    l.add(parseNonAlternative());
			c = peek();
		}

		if (c != EOF && c != ')') {
			throw new IllegalArgumentException(
					String.format("Expected '|' or end of input, but got %s instead at position %d.", prettyPeek(), pos));
		}

		return Regex.alternatives(l);
	}

	private Regex parseNonAlternative() {
		char c = peek();

		if (c == EOF || c == ')' || c == '|')
			return Regex.epsilon();

		List<Regex> l = new LinkedList<Regex>();
		do {
			Regex r;
			if (c == '{') {
				pos++;
				if (peek() != '}') {
					throw new IllegalArgumentException(
							String.format("Expected '}', but got %s instead at position %d.", prettyPeek(), pos));
				}
				pos++;
				r = Regex.empty();
			} else if (c == '∅') {
				r = Regex.empty();
				pos++;
			} else if (c == 'ε' || c == '*') {
				// == '*' because of "(*)", which corresponds to "(ε*)"
				r = Regex.epsilon();
				pos++;
			} else if (c == '(') {
				pos++;
				r = parseAlternative();
				if (peek() != ')') {
					throw new IllegalArgumentException(
							String.format("Expected ')', but got %s instead at position %d.", prettyPeek(), pos));
				}
				pos++;
			} else if (isSymbol(c)) {
				r = Regex.single(c);
				pos++;
			} else {
				throw new IllegalArgumentException(String.format(
						"Expected alphabet symbol or one of '(', '|', ')', '*', but got %s instead at position %d.", prettyPeek(), pos));
			}

			while (peek() == '*') {
				r = Regex.star(r);
				pos++;
			}

			l.add(r);
			c = peek();
		} while (c != EOF && c != ')' && c != '|');

		return Regex.concats(l);
	}

}

With the RE part written, let us now implement the DFA part, a DFA consists of the corresponding set of states and an alphabet based on the set of states and Σ consisting of, first, a presentation of the DNA as a whole.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148



import java.util.*;
import java.util.stream.Collectors;

public class DFA {

    protected Set<State> states;
    protected Map<State, Map<Character, State>> transitions;
    protected Set<Character> alphabet;
    protected State startState;
    protected Set<State> finalStates;

    public DFA(Set<State> states, Map<State, Map<Character, State>> transitions, Set<Character> alphabet, State startState, Set<State> finalStates) {
        this.states = Objects.requireNonNull(states, "States must not be null.");
        this.transitions = Objects.requireNonNull(transitions, "Transitions must not be null.");
        this.alphabet = Objects.requireNonNull(alphabet, "Alphabet must not be null.");
        this.startState = Objects.requireNonNull(startState, "Start state must not be null.");
        this.finalStates = Objects.requireNonNull(finalStates, "Final states must not be null.");
        checkValidDFA();
    }

    public DFA(Set<State> states, Set<Transition> transitions, Set<Character> alphabet, State startState, Set<State> finalStates) {
        this(states, new HashMap<>(), alphabet, startState, finalStates);
        for (var trans1 : transitions) {
            for (var trans2 : transitions) {
                if (trans1 != trans2 &&
                        trans1.getStart().equals(trans2.getStart()) && trans1.getLabel() == trans2.getLabel()) {
                    throw new IllegalArgumentException("The set of transitions is not deterministic");
                }
            }
        }
        for (var s : states) {
            this.transitions.put(s, new HashMap<>());
        }
        for (var trans : transitions) {
            this.transitions.get(trans.getStart()).put(trans.getLabel(), trans.getEnd());
        }
    }

    /**
     * Returns all transitions starting in s
     * @param s The state where the transitions should start
     * @return All transitions starting in s
     */
    public Set<Transition> getTransitionsFromState(State s){
        return this.transitions.getOrDefault(s, new HashMap<>()).entrySet().stream()
                .map(t -> new Transition(s, t.getValue(), t.getKey()))
                .collect(Collectors.toSet());
    }

    /**
     * Returns the next state if it exists.
     * @param s The state where the transition should start
     * @param c The character to be read
     * @return An Option for the next state.
     */
    public Optional<State> getSuccessor(State s, Character c) {
        State nextState = this.transitions.getOrDefault(s, new HashMap<>()).getOrDefault(c, null);
        return Optional.ofNullable(nextState);
    }

    /**
     * Get the set of all transitions.
     * @return All transitions in the DFA
     */
    public Set<Transition> getTransitionSet() {
        return this.transitions.keySet().stream()
                .flatMap(s -> this.getTransitionsFromState(s).stream())
                .collect(Collectors.toSet());
    }

    /**
     * Checks whether all parameters given to the constructor are valid.
     * Throw an exception describing the problem if some parameter is invalid.
     */
    private void checkValidDFA() throws IllegalArgumentException{
        //All special states are in the state set
        if (!states.contains(startState)) {
            throw new IllegalArgumentException("EpsilonNFA constructor: startState must be element of states.");
        }
        if(!states.containsAll(finalStates)){
            throw new IllegalArgumentException("EpsilonNFA constructor: finalStates must be subset of states.");
        }

        //Transitions only use states from the state set and letters from the alphabet
        for (Transition t : this.getTransitionSet()){
            if (!states.contains(t.getStart())) {
                throw new IllegalArgumentException("EpsilonNFA constructor: Transition " + t + " starts in a state that is not an element of states");
            }
            if (!states.contains(t.getEnd())) {
                throw new IllegalArgumentException("EpsilonNFA constructor: Transition " + t + " ends in a state that is not an element of states");
            }
            if (!alphabet.contains(t.getLabel())){
                throw new IllegalArgumentException("EpsilonNFA constructor: Transition " + t + " uses a letter that is not an element of the alphabet");
            }
        }
    }

    public Set<State> getStates() {
        return states;
    }

    public Set<Character> getAlphabet() {
        return alphabet;
    }

    public State getStartState() {
        return startState;
    }

    public Set<State> getFinalStates() {
        return finalStates;
    }

    @Override
    public String toString(){
        return "EpsilonNFA\n" + toStringHelper();
    }
    public String toStringHelper(){
        String alph = "Alphabet: ";
        for (char c : alphabet){
            alph += c+";";
        }
        alph=alph.substring(0,alph.length()-1);

        String sta = "States: ";
        for (State s : states){
            sta += s+";";
        }
        sta=sta.substring(0,sta.length()-1);

        String init = "Init: " + startState;

        String fin = "Final: ";
        for (State s : finalStates){
            fin += s+";";
        }
        if (!finalStates.isEmpty()) fin=fin.substring(0,fin.length()-1);

        String trans = "Transitions:\n";
        for (Transition t : this.getTransitionSet()){
            trans += t.toString()+"\n";
        }
        return alph + "\n" + sta + "\n" + init + "\n" + fin + "\n" + trans + "END";
    }

}

The next point is about the implementation of the State, or state, class.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71


import java.util.HashSet;
import java.util.Objects;
import java.util.Set;

public class State implements Comparable<State> {

    private static int id_counter = 0;
    private final int id;
    private final String name; //may be useful for thinking or debugging

    public State(){
        id = id_counter++;
        this.name = "";
    }

    public State(String name){
        id = id_counter++;
        this.name = name;
    }

    /**
     * Returns a set of states with size given by the parameter size.
     * The name of each state is the empty String.
     * @param size Gives the size of the set
     * @return A set of states with size given by the parameter size.
     */
    public static Set<State> getStateSet(int size){
        Set<State> result = new HashSet<State>();
        for (; size>0; size--){
            result.add(new State());
        }
        return result;
    }


    public String getName() {
        return name;
    }

    @Override
    public String toString(){
        if (name!=""){
            return name;
        }
        else{
            return "s"+id;
        }
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        State state = (State) o;
        return id == state.id;
    }

    @Override
    public int hashCode() {
        return Objects.hash(id);
    }

    public int compareTo(State other) {
    	return Integer.compare(this.id, other.id);
    }

    public int getId() {
        return id;
    }
}

The following is a specific mapping implementation of the transfer function.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48



import java.util.Objects;

public class Transition {

    private final State start;
    private final State end;
    private final char label;

    public Transition(State start, State end, char label) {
        this.start = start;
        this.end = end;
        this.label = label;
    }

    public State getStart() {
        return start;
    }

    public State getEnd() {
        return end;
    }

    public char getLabel() {
        return label;
    }

    @Override
    public String toString(){
        return start + ";" + label + ";" + end;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        Transition that = (Transition) o;
        return Objects.equals(start, that.start) &&
                Objects.equals(end, that.end) &&
                Objects.equals(label, that.label);
    }

    @Override
    public int hashCode() {
        return Objects.hash(start, end, label);
    }
}

Finally the basic part of copy finished, the next DFAOperation class is the focus, which is based on the existing DFA to identify whether a string is identified by it; and there is a given DFA to generate equivalent regular expressions (the algorithm is also Tobia Professor slides given above the class, in another blog post is also explained), I In order to be lazy, I basically use stream to implement it, so it is certainly not the best performance. The point to note here is that when converting the stream to an array of State classes, you need to sort them according to the corresponding ids, otherwise there will be a problem when using the algorithm given by the professor to summarize (it took several debug sessions to see this problem in a flash).

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90


import java.util.*;
import java.util.stream.Collectors;

public class DFAOperations {

	private final DFA dfa;

	public DFAOperations(DFA dfa) {
		this.dfa = dfa;
	}

	/** TODO
	 * Checks whether this DFA accepts a given word.
	 * @param word The word
	 * @return True if the DFA accepts the word.
	 */
	public boolean accepts(String word) {
		boolean accept = false;
		State tmp = dfa.startState;
		for (int i = 0; i < word.length(); i++){
			if(tmp.equals(null)){
				accept = false;
				break;
			}
			if(dfa.getAlphabet().contains(word.charAt(i))){
				if(dfa.getSuccessor(tmp, word.charAt(i)).isEmpty()){
					return false;
				}
				tmp =  dfa.getSuccessor(tmp, word.charAt(i)).get();

			}else {
				return false;
			}

		}
		if(dfa.finalStates.contains(tmp)){
			accept = true;
		}
		return accept;
	}

	/** TODO
	 * Converts a DFA to a regular expression that accepts the same language.
	 * The implementation should follow the proof of theorem 3.19.
	 *
	 * @return The regex that is equivalent to this automaton
	 */
	public Regex toRegex() {
		int n = dfa.states.size();
		int start = dfa.startState.getId();
		List<Regex> re = dfa.finalStates.stream().map(state -> RijGenerator(dfa, n, start + 1, state.getId() + 1)).collect(Collectors.toList());
        return Regex.alternatives(re.iterator());
	}

	public Regex RijGenerator(DFA dfa, int k, int i, int j){
		State[] middle = dfa.getStates().stream().sorted(State::compareTo).toArray(State[]::new);
		//middle = Arrays.stream(middle).sorted(State::compareTo).toArray(State[]::new);
		if(k == 0){
			Regex re;
			if(i != j){
				re = Regex.alternatives(dfa.getTransitionsFromState(middle[i - 1]).stream().filter(s -> s.getEnd().equals(middle[j - 1])).map(s -> Regex.single(s.getLabel())));
			}else {
				re = Regex.alternatives(dfa.getTransitionsFromState(middle[i - 1]).stream().filter(s -> s.getEnd().equals(middle[j - 1])).map(s -> Regex.single(s.getLabel()))).alternative(Regex.epsilon());
			}
			return re;
		}
		Regex result = Regex.alternatives(RijGenerator(dfa, k - 1, i, j),Regex.concats(RijGenerator(dfa, k - 1, i, k), Regex.star(RijGenerator(dfa, k - 1, k, k)), RijGenerator(dfa, k - 1, k, j)));
		return result;
	}


	public static void main(String[] args) {
		Scanner sc = new Scanner(System.in);
		DFAOperations dfa = new DFAOperations(Parser.parse(sc));
		String mode = sc.nextLine();
		if (mode.equals("Accepts")) {
			int numWords = Integer.parseInt(sc.nextLine());
			for (int i = 0; i < numWords; ++i) {
				System.out.println(dfa.accepts(sc.nextLine()));
			}
		} else if (mode.equals("ToRegex")) {
			System.out.println(dfa.toRegex());
		} else {
			sc.close();
			System.err.println("Invalid mode.");
			System.exit(-1);
		}
	}

}

The last and final is to determine the DFA parser class based on the input, and the input syntax has been given as follows.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129


import java.util.*;

public class Parser {
    /**
     * DFA
     * Alphabet: a;b;c
     * States: name;name2;name3
     * Init: name
     * Final: name;name2
     * Transitions:
     * q;a;p
     * q;;r
     * END
     */


    static DFA parse(Scanner scanner) {
        //First line
        String first = scanner.nextLine();
        if (!first.equals("NFA")&&!first.equals("DFA")&&!first.equals("EpsilonNFA")) {
            throw new IllegalArgumentException("Parsed automaton does not start with NFA, DFA or EpsilonNFA.");
        }

        //Second line; Alphabet
        String second = scanner.nextLine();
        if (!second.startsWith("Alphabet: ")) {
            throw new IllegalArgumentException("Parsed automaton does not declare alphabet first.");
        }
        Set<Character> alphabet = new HashSet<>();
        second = second.substring("Alphabet: ".length());
        for (String letter : second.split(";")) {
            if (letter.length() == 1) {
                alphabet.add(letter.charAt(0));
            } else {
                throw new IllegalArgumentException(
                        "Letters have to be input as a semicolon separated list without spaces. Letters may only be chars.");
            }
        }

        //Third line; States
        String third = scanner.nextLine();
        if (!third.startsWith("States: ")) {
            throw new IllegalArgumentException("Parsed automaton does not declare states second.\"");
        }
        Set<State> states = new HashSet<>();
        third = third.substring("States: ".length());
        for (String stateStr : third.split(";")) {
            states.add(new State(stateStr));
        }

        //Fourth line; initialstate
        String fourth = scanner.nextLine();
        if (!fourth.startsWith("Init: ")) {
            throw new IllegalArgumentException("Parsed automaton does not declare initial state third.");
        }
        fourth = fourth.substring("Init: ".length());

        //fifth line; final states
        String fifth = scanner.nextLine();
        if (!fifth.startsWith("Final: ")) {
            throw new IllegalArgumentException("Parsed automaton does not declare final states fourth.");
        }
        fifth=fifth.substring("Final: ".length());
        Set<State> finalStates = new HashSet<>();
        String[] finals=fifth.split(";");

        //Sixth line; transitions
        String sixth = scanner.nextLine();
        if (!sixth.equals("Transitions:")) {
            throw new IllegalArgumentException("Parsed automaton does not declare transitions fifth.");
        }

        Set<Transition> transitions = new HashSet<>();
        String transition;
        while (!(transition = scanner.nextLine()).equals("END")) {
            String[] split = transition.split(";");
            String left = split[0];
            String right = split[2];
            String letter = split[1];
            char label; //checking label in alphabet is done by checkValidEpsNFA
            if(letter.length()>1){
                throw new IllegalArgumentException("Transition label may only be a char, but it is " + letter);
            }
            label = letter.charAt(0);

            State l=null;
            State r=null;
            //the following loop brought to you by: Too lazy to change bad modelling decisions
            for (State s : states){
                if(s.getName().equals(left)){
                    l=s;
                }
                if(s.getName().equals(right)){
                    r=s;
                }
                if(r!=null && l!=null){
                    break;
                }
            }
            if(r==null && l==null){
                throw new IllegalArgumentException("The states for a transition are not in the state set ("+right+" or " + left +")");
            }

            transitions.add(new Transition(l, r, label));
        }

        State init = null;

        for (State s : states){
            if(s.getName().equals(fourth)){
                init=s;
            }
            for (String fStr:finals) {
                if (s.getName().equals(fStr)){
                    finalStates.add(s);
                }
            }
        }
        if(init==null){
            throw new IllegalArgumentException("Initial state is not in state set");
        }
        if(finals.length!=finalStates.size() && !finals[0].equals("")){
            throw new IllegalArgumentException("Not all final states are in the state set");
        }

        return new DFA(states, transitions, alphabet, init, finalStates);
    }

}