Skip to content

Commit dbb7ab3

Browse files
committed
String theory: extend API with support for code points.
1 parent f841c4c commit dbb7ab3

File tree

10 files changed

+212
-9
lines changed

10 files changed

+212
-9
lines changed

src/org/sosy_lab/java_smt/api/StringFormulaManager.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,8 @@ public interface StringFormulaManager {
7171
IntegerFormula indexOf(StringFormula str, StringFormula part, IntegerFormula startIndex);
7272

7373
/**
74-
* Get a substring of length 1 from the given String.
75-
*
76-
* <p>The result is underspecified, if the index is out of bounds for the given String.
74+
* Get a substring of length 1 from the given String if the given index is within bounds.
75+
* Otherwise, returns an empty string.
7776
*/
7877
StringFormula charAt(StringFormula str, IntegerFormula index);
7978

@@ -231,4 +230,17 @@ default RegexFormula concat(RegexFormula... parts) {
231230
* It returns the empty string <code>""</code> for negative numbers.
232231
*/
233232
StringFormula toStringFormula(IntegerFormula number);
233+
234+
/**
235+
* Returns an Integer formula representing the code point of the only character of the given
236+
* String formula, if s is a singleton string. Otherwise, returns -1.
237+
*/
238+
IntegerFormula toCodePoint(StringFormula str);
239+
240+
/**
241+
* Returns the singleton string whose only character is the given code point if it is represented
242+
* as a single char in UTF16, i.e., it is in the range [0, 196607]. Otherwise, returns the empty
243+
* string.
244+
*/
245+
StringFormula fromCodePoint(IntegerFormula codePoint);
234246
}

src/org/sosy_lab/java_smt/basicimpl/AbstractStringFormulaManager.java

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ protected RegexFormula wrapRegex(TFormulaInfo formulaInfo) {
4040
return getFormulaCreator().encapsulateRegex(formulaInfo);
4141
}
4242

43+
protected IntegerFormula wrapInteger(TFormulaInfo formulaInfo) {
44+
return getFormulaCreator().encapsulate(FormulaType.IntegerType, formulaInfo);
45+
}
46+
4347
@Override
4448
public StringFormula makeString(String value) {
4549
return wrapString(makeStringImpl(value));
@@ -141,10 +145,7 @@ public BooleanFormula contains(StringFormula str, StringFormula part) {
141145

142146
@Override
143147
public IntegerFormula indexOf(StringFormula str, StringFormula part, IntegerFormula startIndex) {
144-
return getFormulaCreator()
145-
.encapsulate(
146-
FormulaType.IntegerType,
147-
indexOf(extractInfo(str), extractInfo(part), extractInfo(startIndex)));
148+
return wrapInteger(indexOf(extractInfo(str), extractInfo(part), extractInfo(startIndex)));
148149
}
149150

150151
protected abstract TFormulaInfo indexOf(
@@ -287,8 +288,7 @@ public RegexFormula times(RegexFormula regex, int repetitions) {
287288

288289
@Override
289290
public IntegerFormula toIntegerFormula(StringFormula str) {
290-
return getFormulaCreator()
291-
.encapsulate(FormulaType.IntegerType, toIntegerFormula(extractInfo(str)));
291+
return wrapInteger(toIntegerFormula(extractInfo(str)));
292292
}
293293

294294
protected abstract TFormulaInfo toIntegerFormula(TFormulaInfo pParam);
@@ -299,4 +299,18 @@ public StringFormula toStringFormula(IntegerFormula number) {
299299
}
300300

301301
protected abstract TFormulaInfo toStringFormula(TFormulaInfo pParam);
302+
303+
@Override
304+
public IntegerFormula toCodePoint(StringFormula number) {
305+
return wrapInteger(toCodePoint(extractInfo(number)));
306+
}
307+
308+
protected abstract TFormulaInfo toCodePoint(TFormulaInfo pParam);
309+
310+
@Override
311+
public StringFormula fromCodePoint(IntegerFormula number) {
312+
return wrapString(fromCodePoint(extractInfo(number)));
313+
}
314+
315+
protected abstract TFormulaInfo fromCodePoint(TFormulaInfo pParam);
302316
}

src/org/sosy_lab/java_smt/delegate/debugging/DebuggingStringFormulaManager.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,4 +354,22 @@ public StringFormula toStringFormula(IntegerFormula number) {
354354
debugging.addFormulaTerm(result);
355355
return result;
356356
}
357+
358+
@Override
359+
public IntegerFormula toCodePoint(StringFormula str) {
360+
debugging.assertThreadLocal();
361+
debugging.assertFormulaInContext(str);
362+
IntegerFormula result = delegate.toCodePoint(str);
363+
debugging.addFormulaTerm(result);
364+
return result;
365+
}
366+
367+
@Override
368+
public StringFormula fromCodePoint(IntegerFormula codepoint) {
369+
debugging.assertThreadLocal();
370+
debugging.assertFormulaInContext(codepoint);
371+
StringFormula result = delegate.fromCodePoint(codepoint);
372+
debugging.addFormulaTerm(result);
373+
return result;
374+
}
357375
}

src/org/sosy_lab/java_smt/delegate/statistics/StatisticsStringFormulaManager.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,4 +233,16 @@ public StringFormula toStringFormula(IntegerFormula number) {
233233
stats.stringOperations.getAndIncrement();
234234
return delegate.toStringFormula(number);
235235
}
236+
237+
@Override
238+
public IntegerFormula toCodePoint(StringFormula str) {
239+
stats.stringOperations.getAndIncrement();
240+
return delegate.toCodePoint(str);
241+
}
242+
243+
@Override
244+
public StringFormula fromCodePoint(IntegerFormula codepoint) {
245+
stats.stringOperations.getAndIncrement();
246+
return delegate.fromCodePoint(codepoint);
247+
}
236248
}

src/org/sosy_lab/java_smt/delegate/synchronize/SynchronizedStringFormulaManager.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,4 +268,18 @@ public StringFormula toStringFormula(IntegerFormula number) {
268268
return delegate.toStringFormula(number);
269269
}
270270
}
271+
272+
@Override
273+
public IntegerFormula toCodePoint(StringFormula str) {
274+
synchronized (sync) {
275+
return delegate.toCodePoint(str);
276+
}
277+
}
278+
279+
@Override
280+
public StringFormula fromCodePoint(IntegerFormula codepoint) {
281+
synchronized (sync) {
282+
return delegate.fromCodePoint(codepoint);
283+
}
284+
}
271285
}

src/org/sosy_lab/java_smt/solvers/cvc4/CVC4StringFormulaManager.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,4 +190,14 @@ protected Expr toIntegerFormula(Expr pParam) {
190190
protected Expr toStringFormula(Expr pParam) {
191191
return exprManager.mkExpr(Kind.STRING_ITOS, pParam);
192192
}
193+
194+
@Override
195+
protected Expr toCodePoint(Expr pParam) {
196+
return exprManager.mkExpr(Kind.STRING_TO_CODE, pParam);
197+
}
198+
199+
@Override
200+
protected Expr fromCodePoint(Expr pParam) {
201+
return exprManager.mkExpr(Kind.STRING_FROM_CODE, pParam);
202+
}
193203
}

src/org/sosy_lab/java_smt/solvers/cvc5/CVC5StringFormulaManager.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,4 +186,14 @@ protected Term toStringFormula(Term pParam) {
186186
"CVC5 only supports INT to STRING conversion.");
187187
return solver.mkTerm(Kind.STRING_FROM_INT, pParam);
188188
}
189+
190+
@Override
191+
protected Term toCodePoint(Term pParam) {
192+
return solver.mkTerm(Kind.STRING_TO_CODE, pParam);
193+
}
194+
195+
@Override
196+
protected Term fromCodePoint(Term pParam) {
197+
return solver.mkTerm(Kind.STRING_FROM_CODE, pParam);
198+
}
189199
}

src/org/sosy_lab/java_smt/solvers/princess/PrincessStringFormulaManager.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,4 +309,14 @@ protected ITerm toIntegerFormula(IExpression pParam) {
309309
protected ITerm toStringFormula(IExpression pParam) {
310310
return new IFunApp(PrincessEnvironment.stringTheory.int_to_str(), toITermSeq(pParam));
311311
}
312+
313+
@Override
314+
protected IExpression toCodePoint(IExpression pParam) {
315+
return new IFunApp(PrincessEnvironment.stringTheory.str_to_code(), toITermSeq(pParam));
316+
}
317+
318+
@Override
319+
protected IExpression fromCodePoint(IExpression pParam) {
320+
return new IFunApp(PrincessEnvironment.stringTheory.str_from_code(), toITermSeq(pParam));
321+
}
312322
}

src/org/sosy_lab/java_smt/solvers/z3/Z3StringFormulaManager.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,4 +178,14 @@ protected Long toIntegerFormula(Long pParam) {
178178
protected Long toStringFormula(Long pParam) {
179179
return Native.mkIntToStr(z3context, pParam);
180180
}
181+
182+
@Override
183+
protected Long toCodePoint(Long pParam) {
184+
return Native.mkStringToCode(z3context, pParam);
185+
}
186+
187+
@Override
188+
protected Long fromCodePoint(Long pParam) {
189+
return Native.mkStringFromCode(z3context, pParam);
190+
}
181191
}

src/org/sosy_lab/java_smt/test/StringFormulaManagerTest.java

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ public class StringFormulaManagerTest extends SolverBasedTest0.ParameterizedSolv
6464
"abchurrdurr",
6565
"abcdefaaaaa");
6666

67+
private static final int MAX_SINGLE_CODE_POINT_IN_UTF16 = 0x2FFFF;
68+
6769
private StringFormula a;
6870
private StringFormula b;
6971
private StringFormula ab;
@@ -736,6 +738,97 @@ public void testCharAtWithConstString() throws SolverException, InterruptedExcep
736738
assertDistinct(smgr.charAt(ab, imgr.makeNumber(1)), a);
737739
}
738740

741+
@Test
742+
public void testCharAtHasAlwaysLengthZeroOrOne() throws SolverException, InterruptedException {
743+
StringFormula someString = smgr.makeVariable("someString");
744+
IntegerFormula position = imgr.makeVariable("position");
745+
IntegerFormula length = smgr.length(smgr.charAt(someString, position));
746+
747+
BooleanFormula lengthZero = imgr.equal(length, imgr.makeNumber(0));
748+
BooleanFormula lengthOne = imgr.equal(length, imgr.makeNumber(1));
749+
750+
assertThatFormula(bmgr.or(lengthZero, lengthOne)).isTautological();
751+
}
752+
753+
@Test
754+
public void testStringToCodePoint() throws SolverException, InterruptedException {
755+
// TODO report to developers
756+
assume()
757+
.withMessage("Solver %s crashes", solverToUse())
758+
.that(solverToUse())
759+
.isNotEqualTo(Solvers.PRINCESS);
760+
761+
assertThatFormula(imgr.equal(smgr.toCodePoint(a), imgr.makeNumber('a'))).isTautological();
762+
assertThatFormula(imgr.equal(smgr.toCodePoint(b), imgr.makeNumber('b'))).isTautological();
763+
764+
// string of length != 1 are invalid and return -1
765+
assertThatFormula(imgr.equal(smgr.toCodePoint(ab), imgr.makeNumber(-1))).isTautological();
766+
assertThatFormula(imgr.equal(smgr.toCodePoint(empty), imgr.makeNumber(-1))).isTautological();
767+
}
768+
769+
@Test
770+
public void testToCodePointInRange() throws SolverException, InterruptedException {
771+
// TODO report to developers
772+
assume()
773+
.withMessage("Solver %s crashes", solverToUse())
774+
.that(solverToUse())
775+
.isNotEqualTo(Solvers.PRINCESS);
776+
777+
StringFormula str = smgr.makeVariable("str");
778+
IntegerFormula cp = smgr.toCodePoint(str);
779+
BooleanFormula invalidStr = imgr.equal(cp, imgr.makeNumber(-1));
780+
BooleanFormula cpInRange =
781+
bmgr.and(
782+
imgr.lessOrEquals(imgr.makeNumber(0), cp),
783+
imgr.lessOrEquals(cp, imgr.makeNumber(MAX_SINGLE_CODE_POINT_IN_UTF16)));
784+
assertThatFormula(bmgr.or(invalidStr, cpInRange)).isTautological();
785+
}
786+
787+
@Test
788+
public void testFromCodePointInRange() throws SolverException, InterruptedException {
789+
// TODO report to developers
790+
assume()
791+
.withMessage("Solver %s crashes", solverToUse())
792+
.that(solverToUse())
793+
.isNotEqualTo(Solvers.PRINCESS);
794+
795+
IntegerFormula cp = imgr.makeVariable("cp");
796+
StringFormula str = smgr.fromCodePoint(cp);
797+
IntegerFormula len = smgr.length(str);
798+
799+
// all normal code points are in range, i.e., string length is 1.
800+
BooleanFormula cpInRange =
801+
bmgr.and(
802+
imgr.lessOrEquals(imgr.makeNumber(0), cp),
803+
imgr.lessOrEquals(cp, imgr.makeNumber(MAX_SINGLE_CODE_POINT_IN_UTF16)));
804+
assertThatFormula(cpInRange).isEquivalentTo(imgr.equal(len, imgr.makeNumber(1)));
805+
806+
// all other code points are out of range, i.e., they match the empty string with length 0.
807+
assertThatFormula(bmgr.not(cpInRange)).isEquivalentTo(smgr.equal(str, empty));
808+
}
809+
810+
@Test
811+
public void testStringFromCodePoint() throws SolverException, InterruptedException {
812+
StringFormula cpA = smgr.fromCodePoint(imgr.makeNumber('a'));
813+
StringFormula cpB = smgr.fromCodePoint(imgr.makeNumber('b'));
814+
assertThatFormula(smgr.equal(cpA, a)).isTautological();
815+
assertThatFormula(smgr.equal(cpB, b)).isTautological();
816+
assertThatFormula(smgr.equal(cpA, smgr.makeString(Character.toString(97)))).isTautological();
817+
818+
StringFormula cpOne = smgr.fromCodePoint(imgr.makeNumber(1));
819+
StringFormula cpTen = smgr.fromCodePoint(imgr.makeNumber(10));
820+
assertThatFormula(smgr.equal(cpOne, smgr.makeString(Character.toString(1)))).isTautological();
821+
assertThatFormula(smgr.equal(cpTen, smgr.makeString(Character.toString(10)))).isTautological();
822+
823+
// negative numbers are invalid and return empty string
824+
StringFormula cpNegOne = smgr.fromCodePoint(imgr.makeNumber(-1));
825+
StringFormula cpNegTen = smgr.fromCodePoint(imgr.makeNumber(-10));
826+
StringFormula cpNeg256 = smgr.fromCodePoint(imgr.makeNumber(-100));
827+
assertThatFormula(smgr.equal(cpNegOne, empty)).isTautological();
828+
assertThatFormula(smgr.equal(cpNegTen, empty)).isTautological();
829+
assertThatFormula(smgr.equal(cpNeg256, empty)).isTautological();
830+
}
831+
739832
/**
740833
* Test escapecharacter treatment. Escape characters are treated as a single char! Example:
741834
* "a\u1234T" has "a" at position 0, "\u1234" at position 1 and "T" at position 2

0 commit comments

Comments
 (0)