|  |     Let's assume (for convenience) that you are going to represent the
    number in binary.  You have a function which allows you to build the
    "index" by emitting its bits in order (either from lowest to highest or
    vice versa, it doesn't matter) as you discover them.  You are going to
    build the index by looking at each element of the permutation 1 through
    m, in turn.  Let k1 (=k) be the number of different objects which
    appear at or above location 1 in the permuation, k2 (= either k or k-1)
    be the number of different objects which appear at or above location
    2 in the permuation, etc. up to km (=1) which appaer at or above
    the last locatio of the permuation.
    Go through the elements of the permutation from first to last.  At
    location i, there are ki possible elements.  Assume for the moment
    that all the ki are powers of 2.  Then you could specify the correct
    choice at each position by emiting the appropriate number of bits.
    (Keeping track of which number represents which element can be handled
    in a pretty straight forward way with some form of rebalancing binary
    tree which can be accessed at step i at average cost of log(ki)).
    What if a ki is not a power of 2?  (In fact unless all of the ni-s are
    2 or less it is almost guarenteed that one of the kis will not be a
    power of 2).  This is precisely what the technique called arithmetic
    coding is for (arithmetic coding is usually used for compression, and
    if you think about it, this is simply a problem in maximal compression
    of a permuated sequence).
    Arithmetic coding is complicated enough so that I won't describe it
    here -- but there was a nice article about it in CACM not too many
    years ago.  I can probably find the reference if you really want it.
					Topher
 | 
|  | C     This program is a quick example to show the idea of maximally
C     compressed coding of permutations of object types.  This coding
C     preserves lexicographical ordering although it can be made more
C     efficient if this property is discarded.  See comments within code
C     for how to do this.
C
C     The input is very specialized and there is no error checking.
C
      INTEGER IPERM1(99),IPERM2(99),N(9)
      INTEGER ENPERM
C
C     M is the number of distinct types of objects
C     N[I] is the number of objects of the Ith type, I=1..M
C     IPERM1 is the user input permutation
C     IPERM2 is the decoded permutation
C
      M=3
      N(1)=4
      N(2)=4
      N(3)=1
      NPOS=0
      DO 10 I=1,M
10      NPOS=NPOS+N(I)
      TYPE 20
20    FORMAT(' Input permutation (4 ones, 4 twos, and 1 three): ',$)
      ACCEPT 30,(IPERM1(I),I=1,NPOS)
30    FORMAT(80I1)
      KODE=ENPERM(N,M,IPERM1)
      CALL DEPERM(IPERM2,N,M,KODE)
      TYPE 40,KODE
40    FORMAT(' Code:    ',I9)
      TYPE 50,(IPERM1(I),I=1,NPOS)
50    FORMAT(' Input:   ',70I1)
      TYPE 60,(IPERM2(I),I=1,NPOS)
60    FORMAT(' Decoded: ',70I1)
      CALL EXIT
      END
      SUBROUTINE DEPERM(IARRAY,N,M,KODE)
      INTEGER IARRAY(1),N(1)
      INTEGER IMAP(99),LEFT(99)
C
C     IARRAY   is the decoded permutation
C     N[I]     is the number of objects of the Ith type, I=1..M
C     M        is the number of distinct types of objects
C     KODE     is the permutation code
C     IMAP(I)  is the Ith type of object.  This changes as objects are
C              used up
C     LEFT(I)  is the number of unused objects of type I
C     NPOS     is the number of positions in the permutation
C     NTOT     is the total number of permutations
C     ITOP     is the number of object types that aren't used up
C
      NPOS=0
      NTOT=1
      NPOS=0
      DO 20 I=1,M
        DO 10 J=1,N(I)
          NPOS=NPOS+1
10        NTOT=(NTOT*NPOS)/J
        IMAP(I)=I
20      LEFT(I)=N(I)
      ITOP=M
      NUM=KODE
      KKK=NTOT
      DO 50 IPOS=NPOS,1,-1
        IBOT=0
        I=0
30        I=I+1
          LLL=(KKK*LEFT(I))/IPOS
          IBOT=IBOT+LLL
          IF (NUM.GE.IBOT) GOTO 30
        KKK=LLL
        NUM=NUM-IBOT+KKK
        IARRAY(NPOS-IPOS+1)=IMAP(I)
        LEFT(I)=LEFT(I)-1
        IF (LEFT(I).EQ.0) THEN
C
C    The following loop can be replaced with this code if the code isn't
C    required to preserve lexicographical order.  But make sure that both
C    ENPERM and DEPERM are consistent.
C
C         LEFT(I)=LEFT(ITOP)
C         IMAP(I)=IMAP(ITOP)
C
          DO 40 J=I,ITOP-1
            LEFT(J)=LEFT(J+1)
40          IMAP(J)=IMAP(J+1)
C
          ITOP=ITOP-1
        END IF
50    CONTINUE
      RETURN
      END
      INTEGER FUNCTION ENPERM(N,M,IARRAY)
      INTEGER N(1),IARRAY(1)
      INTEGER MAP(9),LEFT(9)
C
C     ENPERM   is the permutation code (function return value)
C     N[I]     is the number of objects of the Ith type, I=1..M
C     M        is the number of distinct types of objects
C     IARRAY   is the permutation to encode
C     IMAP(I)  is the Ith type of object.  This changes as objects are
C              used up
C     LEFT(I)  is the number of unused objects of type I
C     NPOS     is the number of positions in the permutation
C     NTOT     is the total number of permutations
C     ITOP     is the number of object types that aren't used up
C
      NPOS=0
      NTOT=1
      NPOS=0
      DO 20 I=1,M
        DO 10 J=1,N(I)
          NPOS=NPOS+1
10        NTOT=(NTOT*NPOS)/J
        MAP(I)=I
20      LEFT(I)=N(I)
      ITOP=M
      ENPERM=0
      KKK=NTOT
      DO 50 IPOS=NPOS,2,-1
        J=IARRAY(NPOS-IPOS+1)
        I=0
30        I=I+1
          LLL=(KKK*LEFT(I))/IPOS
          ENPERM=ENPERM+LLL
          IF (MAP(I).NE.J) GOTO 30
        ENPERM=ENPERM-LLL
        KKK=LLL
        LEFT(I)=LEFT(I)-1
        IF (LEFT(I).EQ.0) THEN
C
C    The following loop can be replaced with this code if the code isn't
C    required to preserve lexicographical order.  But make sure that both
C    ENPERM and DEPERM are consistent.
C
C         LEFT(I)=LEFT(ITOP)
C         IMAP(I)=IMAP(ITOP)
C
          DO 40 J=I,ITOP-1
            LEFT(J)=LEFT(J+1)
40          MAP(J)=MAP(J+1)
C
          ITOP=ITOP-1
        END IF
50    CONTINUE
      RETURN
      END
 |