|
Pascal Strings
Last post 04-11-2008 7:58 PM by Lingerance. 45 replies.
-
03-07-2008 11:46 AM
|
|
-
JukeboxJim


- Joined on 08-15-2007
- Posts 18
|
Found in The Depths: Extracts from a "Pascal Strings" class inside a Java project... for when Java strings just aren't fast enough for you:
private final static char[ intDigits = {'0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' , 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' , 'j' , 'k' , 'l' , 'm' , 'n' , 'o' , 'p' , 'q' , 'r' , 's' , 't' , 'u' , 'v' , 'w' , 'x' , 'y' , 'z' };
final static char [ DigitTens = {'0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9'} ;
final static char [ DigitOnes = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'} ;
final static char[ MIN_INT_VALUE = { 11, '-', '2', '1', '4', '7', '4', '8', '3', '6', '4', '8' };
final static char[ MAX_INT_VALUE = { 10, '2', '1', '4' ,'7', '4', '8', '3', '6', '4', '7' };
final static char[ MIN_LONG_VALUE = { 20, '-', '9','2','2','3','3','7','2','0','3','6','8','5','4','7','7','5','8','0','8' };
final static char[ MAX_LONG_VALUE = { 19, '9','2','2','3','3','7','2','0','3','6','8','5','4','7','7','5','8','0','7' };
public static boolean isPString(char[ pstring) { if (pstring == null) { return false; }
switch(pstring.length) { case 1: return pstring[PSTRING_SIZE_OFFSET] == 0; case 16 << 0: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 1: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 2: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 3: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 4: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 5: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 6: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 7: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 8: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 9: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 10: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 11: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; case 16 << 12: return pstring[PSTRING_SIZE_OFFSET] < pstring.length; default: return false;
And for converting:
public static final char[ INFINITY = {'I','n','f','i','n','i','t','y'}; public static final char[ NaN = {'N','a','N'}; public static final char[[ ZEROS = { {}, {'0'}, {'0','0'}, {'0','0','0'}, {'0','0','0','0'}, {'0','0','0','0','0'}, {'0','0','0','0','0','0'}, {'0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'}, {'0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0','0'}, };
|
|
-
-
JukeboxJim


- Joined on 08-15-2007
- Posts 18
|
Sorry, I almost missed:
private static final double[ d_tenthPowers = {
1e-323D, 1e-322D, 1e-321D, 1e-320D, 1e-319D, 1e-318D, 1e-317D, 1e-316D, 1e-315D, 1e-314D,
1e-313D, 1e-312D, 1e-311D, 1e-310D, 1e-309D, 1e-308D, 1e-307D, 1e-306D, 1e-305D, 1e-304D,
1e-303D, 1e-302D, 1e-301D, 1e-300D, 1e-299D, 1e-298D, 1e-297D, 1e-296D, 1e-295D, 1e-294D,
1e-293D, 1e-292D, 1e-291D, 1e-290D, 1e-289D, 1e-288D, 1e-287D, 1e-286D, 1e-285D, 1e-284D,
1e-283D, 1e-282D, 1e-281D, 1e-280D, 1e-279D, 1e-278D, 1e-277D, 1e-276D, 1e-275D, 1e-274D,
1e-273D, 1e-272D, 1e-271D, 1e-270D, 1e-269D, 1e-268D, 1e-267D, 1e-266D, 1e-265D, 1e-264D,
1e-263D, 1e-262D, 1e-261D, 1e-260D, 1e-259D, 1e-258D, 1e-257D, 1e-256D, 1e-255D, 1e-254D,
1e-253D, 1e-252D, 1e-251D, 1e-250D, 1e-249D, 1e-248D, 1e-247D, 1e-246D, 1e-245D, 1e-244D,
1e-243D, 1e-242D, 1e-241D, 1e-240D, 1e-239D, 1e-238D, 1e-237D, 1e-236D, 1e-235D, 1e-234D,
1e-233D, 1e-232D, 1e-231D, 1e-230D, 1e-229D, 1e-228D, 1e-227D, 1e-226D, 1e-225D, 1e-224D,
1e-223D, 1e-222D, 1e-221D, 1e-220D, 1e-219D, 1e-218D, 1e-217D, 1e-216D, 1e-215D, 1e-214D,
1e-213D, 1e-212D, 1e-211D, 1e-210D, 1e-209D, 1e-208D, 1e-207D, 1e-206D, 1e-205D, 1e-204D,
1e-203D, 1e-202D, 1e-201D, 1e-200D, 1e-199D, 1e-198D, 1e-197D, 1e-196D, 1e-195D, 1e-194D,
1e-193D, 1e-192D, 1e-191D, 1e-190D, 1e-189D, 1e-188D, 1e-187D, 1e-186D, 1e-185D, 1e-184D,
1e-183D, 1e-182D, 1e-181D, 1e-180D, 1e-179D, 1e-178D, 1e-177D, 1e-176D, 1e-175D, 1e-174D,
1e-173D, 1e-172D, 1e-171D, 1e-170D, 1e-169D, 1e-168D, 1e-167D, 1e-166D, 1e-165D, 1e-164D,
1e-163D, 1e-162D, 1e-161D, 1e-160D, 1e-159D, 1e-158D, 1e-157D, 1e-156D, 1e-155D, 1e-154D,
1e-153D, 1e-152D, 1e-151D, 1e-150D, 1e-149D, 1e-148D, 1e-147D, 1e-146D, 1e-145D, 1e-144D,
1e-143D, 1e-142D, 1e-141D, 1e-140D, 1e-139D, 1e-138D, 1e-137D, 1e-136D, 1e-135D, 1e-134D,
1e-133D, 1e-132D, 1e-131D, 1e-130D, 1e-129D, 1e-128D, 1e-127D, 1e-126D, 1e-125D, 1e-124D,
1e-123D, 1e-122D, 1e-121D, 1e-120D, 1e-119D, 1e-118D, 1e-117D, 1e-116D, 1e-115D, 1e-114D,
1e-113D, 1e-112D, 1e-111D, 1e-110D, 1e-109D, 1e-108D, 1e-107D, 1e-106D, 1e-105D, 1e-104D,
1e-103D, 1e-102D, 1e-101D, 1e-100D, 1e-99D, 1e-98D, 1e-97D, 1e-96D, 1e-95D, 1e-94D,
1e-93D, 1e-92D, 1e-91D, 1e-90D, 1e-89D, 1e-88D, 1e-87D, 1e-86D, 1e-85D, 1e-84D,
1e-83D, 1e-82D, 1e-81D, 1e-80D, 1e-79D, 1e-78D, 1e-77D, 1e-76D, 1e-75D, 1e-74D,
1e-73D, 1e-72D, 1e-71D, 1e-70D, 1e-69D, 1e-68D, 1e-67D, 1e-66D, 1e-65D, 1e-64D,
1e-63D, 1e-62D, 1e-61D, 1e-60D, 1e-59D, 1e-58D, 1e-57D, 1e-56D, 1e-55D, 1e-54D,
1e-53D, 1e-52D, 1e-51D, 1e-50D, 1e-49D, 1e-48D, 1e-47D, 1e-46D, 1e-45D, 1e-44D,
1e-43D, 1e-42D, 1e-41D, 1e-40D, 1e-39D, 1e-38D, 1e-37D, 1e-36D, 1e-35D, 1e-34D,
1e-33D, 1e-32D, 1e-31D, 1e-30D, 1e-29D, 1e-28D, 1e-27D, 1e-26D, 1e-25D, 1e-24D,
1e-23D, 1e-22D, 1e-21D, 1e-20D, 1e-19D, 1e-18D, 1e-17D, 1e-16D, 1e-15D, 1e-14D,
1e-13D, 1e-12D, 1e-11D, 1e-10D, 1e-9D, 1e-8D, 1e-7D, 1e-6D, 1e-5D, 1e-4D,
1e-3D, 1e-2D, 1e-1D, 1e0D, 1e1D, 1e2D, 1e3D, 1e4D,
1e5D, 1e6D, 1e7D, 1e8D, 1e9D, 1e10D, 1e11D, 1e12D, 1e13D, 1e14D,
1e15D, 1e16D, 1e17D, 1e18D, 1e19D, 1e20D, 1e21D, 1e22D, 1e23D, 1e24D,
1e25D, 1e26D, 1e27D, 1e28D, 1e29D, 1e30D, 1e31D, 1e32D, 1e33D, 1e34D,
1e35D, 1e36D, 1e37D, 1e38D, 1e39D, 1e40D, 1e41D, 1e42D, 1e43D, 1e44D,
1e45D, 1e46D, 1e47D, 1e48D, 1e49D, 1e50D, 1e51D, 1e52D, 1e53D, 1e54D,
1e55D, 1e56D, 1e57D, 1e58D, 1e59D, 1e60D, 1e61D, 1e62D, 1e63D, 1e64D,
1e65D, 1e66D, 1e67D, 1e68D, 1e69D, 1e70D, 1e71D, 1e72D, 1e73D, 1e74D,
1e75D, 1e76D, 1e77D, 1e78D, 1e79D, 1e80D, 1e81D, 1e82D, 1e83D, 1e84D,
1e85D, 1e86D, 1e87D, 1e88D, 1e89D, 1e90D, 1e91D, 1e92D, 1e93D, 1e94D,
1e95D, 1e96D, 1e97D, 1e98D, 1e99D, 1e100D, 1e101D, 1e102D, 1e103D, 1e104D,
1e105D, 1e106D, 1e107D, 1e108D, 1e109D, 1e110D, 1e111D, 1e112D, 1e113D, 1e114D,
1e115D, 1e116D, 1e117D, 1e118D, 1e119D, 1e120D, 1e121D, 1e122D, 1e123D, 1e124D,
1e125D, 1e126D, 1e127D, 1e128D, 1e129D, 1e130D, 1e131D, 1e132D, 1e133D, 1e134D,
1e135D, 1e136D, 1e137D, 1e138D, 1e139D, 1e140D, 1e141D, 1e142D, 1e143D, 1e144D,
1e145D, 1e146D, 1e147D, 1e148D, 1e149D, 1e150D, 1e151D, 1e152D, 1e153D, 1e154D,
1e155D, 1e156D, 1e157D, 1e158D, 1e159D, 1e160D, 1e161D, 1e162D, 1e163D, 1e164D,
1e165D, 1e166D, 1e167D, 1e168D, 1e169D, 1e170D, 1e171D, 1e172D, 1e173D, 1e174D,
1e175D, 1e176D, 1e177D, 1e178D, 1e179D, 1e180D, 1e181D, 1e182D, 1e183D, 1e184D,
1e185D, 1e186D, 1e187D, 1e188D, 1e189D, 1e190D, 1e191D, 1e192D, 1e193D, 1e194D,
1e195D, 1e196D, 1e197D, 1e198D, 1e199D, 1e200D, 1e201D, 1e202D, 1e203D, 1e204D,
1e205D, 1e206D, 1e207D, 1e208D, 1e209D, 1e210D, 1e211D, 1e212D, 1e213D, 1e214D,
1e215D, 1e216D, 1e217D, 1e218D, 1e219D, 1e220D, 1e221D, 1e222D, 1e223D, 1e224D,
1e225D, 1e226D, 1e227D, 1e228D, 1e229D, 1e230D, 1e231D, 1e232D, 1e233D, 1e234D,
1e235D, 1e236D, 1e237D, 1e238D, 1e239D, 1e240D, 1e241D, 1e242D, 1e243D, 1e244D,
1e245D, 1e246D, 1e247D, 1e248D, 1e249D, 1e250D, 1e251D, 1e252D, 1e253D, 1e254D,
1e255D, 1e256D, 1e257D, 1e258D, 1e259D, 1e260D, 1e261D, 1e262D, 1e263D, 1e264D,
1e265D, 1e266D, 1e267D, 1e268D, 1e269D, 1e270D, 1e271D, 1e272D, 1e273D, 1e274D,
1e275D, 1e276D, 1e277D, 1e278D, 1e279D, 1e280D, 1e281D, 1e282D, 1e283D, 1e284D,
1e285D, 1e286D, 1e287D, 1e288D, 1e289D, 1e290D, 1e291D, 1e292D, 1e293D, 1e294D,
1e295D, 1e296D, 1e297D, 1e298D, 1e299D, 1e300D, 1e301D, 1e302D, 1e303D, 1e304D,
1e305D, 1e306D, 1e307D, 1e308D
};
|
|
-
-
Lysis


- Joined on 09-13-2007
- Posts 236
|
What is this Pascal you speak of?
http://www.thebestpageintheuniverse.com
|
|
-
-
djork


- Joined on 09-28-2006
- Posts 665
|
Nothing has ever made me want to kill another human being before. On second though, whoever wrote that was not human. I want to get off the ride.
"Frames securely mediate, by design. Secure multi-mediation is the future of all webbing."
|
|
-
-
Zylon


- Joined on 12-14-2006
- Posts 179
|
Lysis: What is this Pascal you speak of?
It's a programming language used by people who aren't virginal, socially retarded dumbasses with no friends. So, understandably, you wouldn't be familiar with it.
|
|
-
-
AbbydonKrafts


- Joined on 11-21-2006
- Carrollton, GA, USA
- Posts 1,022
|
djork:Nothing has ever made me want to kill another human being before. On second though, whoever wrote that was not human.
I have to agree. That's just not human. Whoever wrote that had to be possessed.
Join us at #TDWTF on irc.slashnet.org !
|
|
-
-
Lysis


- Joined on 09-13-2007
- Posts 236
|
Zylon: Lysis: What is this Pascal you speak of?
It's a programming language used by people who aren't virginal, socially retarded dumbasses with no friends. So, understandably, you wouldn't be familiar with it. I prefer when you quote my flamebait.
http://www.thebestpageintheuniverse.com
|
|
-
-
Lysis


- Joined on 09-13-2007
- Posts 236
|
Zylon: Lysis: What is this Pascal you speak of?
It's a programming language used by people who aren't virginal, socially retarded dumbasses with no friends. So, understandably, you wouldn't be familiar with it. LMAO He edited to quote me. Ty kind sir!
http://www.thebestpageintheuniverse.com
|
|
-
-
mallard


- Joined on 12-22-2005
- Posts 181
|
For those who haven't used them before, Pascal strings differ from C strings in that instead of being null-terminated, the first byte is used to hold the length. Thus they are limited to 255 characters. Their main advantages were being able to find the length in O(1) time, being able to allocate a buffer that is the "right" size when only the first byte has been read (buffer overrun possible, but most Pascal implementations check array bounds by default) and always fitting into a 256-character buffer. Outside of the Pascal programming language, the only use that I am aware of is that the Mac OS Classic API expected most strings in this form, since Mac System 1 was written with Pascal being the targetted application language. (This was before C became popular on non-Unix systems).
|
|
-
-
morbiuswilters


- Joined on 01-15-2008
- East Coast Represent!
- Posts 4,990
|
AbbydonKrafts: djork:Nothing has ever made me want to kill another human being before. On second though, whoever wrote that was not human.
I have to agree. That's just not human. Whoever wrote that had to be possessed.
Only a sadistic bastard could create something like that. We need to remember this event, so that it never happens again. 
We will not be silenced.
|
|
-
-
Aaron


- Joined on 07-10-2007
- Posts 477
|
That's some front-page material right there. I think the worst part is that the Pascal String implementation in pascal is actually a lot simpler than this, if you ignore the screwy reference-counting and memory-management stuff. The perfect right triangle at the end is kind of cute, though.
|
|
-
-
vt_mruhlin


- Joined on 03-01-2007
- Austin, TX
- Posts 508
|
mallard:Thus they are limited to 255 characters. Their main advantages were being able to find the length in O(1) time
It's been a while since algorithms class in college, but if I'm not mistaken....If we know the length of the buffer, the maximum search time for that \0 is 256. I was always told that doing big-oh notation, constants don't generally matter. i.e. O(2n) is considered more or less equivalent to O(n), so O(256) might as well be O(1).
Course if all your code does is find th length of 1000 strings, you might notice that it takes 256 times longer in the extreme case, but generally speaking you're not going to notice a performance hit from that.
|
|
-
-
mallard


- Joined on 12-22-2005
- Posts 181
|
vt_mruhlin: mallard:Thus they are limited to 255 characters. Their main advantages were being able to find the length in O(1) time
It's been a while since algorithms class in college, but if I'm not mistaken....If we know the length of the buffer, the maximum search time for that \0 is 256. I was always told that doing big-oh notation, constants don't generally matter. i.e. O(2n) is considered more or less equivalent to O(n), so O(256) might as well be O(1).
Course if all your code does is find th length of 1000 strings, you might notice that it takes 256 times longer in the extreme case, but generally speaking you're not going to notice a performance hit from that. In my understanding O(1) means that the algorithm runs in constant time, meaning it takes the same amount of time every time (yes, O(256), O(99999), etc are all equivelent to O(1)). Since the algorithm for finding the length of a pascal string is simply "read the first byte", then it is contstant. The fact that pascal strings are limited to 256 characters is because their size must fit in a single byte, but you could have pascal-like strings using any amount of bytes to store the length and they would still have an O(1) time to find the length. This is in contrast to C strings, where the algorithm is "read every byte until you find '\0'" e.g. O(n). If you know the buffer size of the C string then it is still O(n), except that n is now the buffer size, not the string length.
|
|
-
-
arty


- Joined on 01-09-2007
- Posts 163
|
JukeboxJim: final static char[ MIN_INT_VALUE = { 11, '-', '2', '1', '4', '7', '4', '8', '3', '6', '4', '8' };
A guy who was raised on C, fought in the great C war, and is still on a pacific island, championing counted strings.
``Code and Own a piece of an app that is exploding'' -- Non-WTF(?) Jobs Post
|
|
-
-
spamcourt


- Joined on 01-28-2008
- Posts 37
|
mallard:first byte is used to hold the length. Thus they are limited to 255 characters.
But only in the old shortstring which noone uses anymore. The modern Pascal strings have 4 bytes of length information, allowing up to 4GB strings. And they are also null terminated, so they can be passed to C functions directly .
|
|
-
-
Schnolle


- Joined on 01-28-2008
- Posts 5
|
mallard: vt_mruhlin: mallard:Thus they are limited to 255 characters. Their main advantages were being able to find the length in O(1) time
It's been a while since algorithms class in college, but if I'm not mistaken....If we know the length of the buffer, the maximum search time for that \0 is 256. I was always told that doing big-oh notation, constants don't generally matter. i.e. O(2n) is considered more or less equivalent to O(n), so O(256) might as well be O(1).
Course if all your code does is find th length of 1000 strings, you might notice that it takes 256 times longer in the extreme case, but generally speaking you're not going to notice a performance hit from that. In my understanding O(1) means that the algorithm runs in constant time, meaning it takes the same amount of time every time (yes, O(256), O(99999), etc are all equivelent to O(1)). Since the algorithm for finding the length of a pascal string is simply "read the first byte", then it is contstant. The fact that pascal strings are limited to 256 characters is because their size must fit in a single byte, but you could have pascal-like strings using any amount of bytes to store the length and they would still have an O(1) time to find the length. This is in contrast to C strings, where the algorithm is "read every byte until you find '\0'" e.g. O(n). If you know the buffer size of the C string then it is still O(n), except that n is now the buffer size, not the string length. Sorry, both wrong. The O(f(n)) notation is only for the case where n can grow without bounds. Here, n <= 255, so it is meaningless to speak of what happens when n goes to infinity.
|
|
-
-
DZ-Jay


- Joined on 05-03-2005
- Posts 336
|
spamcourt: mallard:first byte is used to hold the length. Thus they are limited to 255 characters.
But only in the old shortstring which noone uses anymore. The modern Pascal strings have 4 bytes of length information, allowing up to 4GB strings. And they are also null terminated, so they can be passed to C functions directly . Those "old shortstrings" are in fact "Pascal Strings". The new C-like strings may be the way modern Pascal implements strings, but the name "Pascal String" is still used to refer to the old short strings of a maximum of 255 characters. Delphi documentation, for example, calls them "shortstrings", but it uses the term "Pascal String" interchangeably. -dZ.
Bastard Operators don't just win. Anyone can win. Bastard Operators win and totally demoralise. That's real winning.
- BOfH
|
|
-
-
-
Soviut


- Joined on 09-13-2007
- Posts 186
|
morbiuswilters: AbbydonKrafts: djork:Nothing has ever made me want to kill another human being before. On second though, whoever wrote that was not human.
I have to agree. That's just not human. Whoever wrote that had to be possessed.
Only a sadistic bastard could create something like that. We need to remember this event, so that it never happens again. 
We will not be silenced. Its like 9/11 times a million!...er...times 1e-122D!
|
|
-
-
bstorer


- Joined on 02-01-2007
- Alexandria, VA
- Posts 3,402
|
mallard:The fact that pascal strings are limited to 256 characters is because their size must fit in a single byte, but you could have pascal-like strings using any amount of bytes to store the length and they would still have an O(1) time to find the length.
No, the longer the string, the more bytes you need to store the size. So it clearly can't be constant time. It'd have an O(log n) time to find the length, where n is the length of the string.
|
|
-
-
Evo


- Joined on 10-16-2006
- Posts 46
|
Nice discussion here, about the complexity... Let me write an algorithm here... int len = -1; for(int i = 0; i < 256; i++) if(len == -1 && buf[i]) len = i; This is, I guess, an O(1) algorithm. Note that, of course, buf must have a constant length of 256. However, when the algorithm is: int len; for(int i = 0; i < 256; i++) if(buf[i]) { len = 1; break; } This is O(n). But it is never slower than the first (dumb) algorithm.
|
|
-
-
Lingerance


- Joined on 07-24-2007
- Canada
- Posts 1,172
|
Evo, both of those set len to 1 and end on the first iteration, therefore they are both O(1). A more correct example would be _something_ like:
//pascal string length:
// O(1);
len = buf[0];
//usage of len
//O(n); n == len
for (int i = 1; i < len; i++) buf[i];
//C string length/usage
//O(n)
len = 0;
while (*buf++) *buf, len++;
irc://irc.slashnet.org/#TDWTF (Redirects to #CodeLove ) Yo dawg I herd hoard you like to search so I put a 2TB txt file in yo SSDS so your memory's maxed out and your computer cant do shit? -- Nyquist
|
|
-
-
bstorer


- Joined on 02-01-2007
- Alexandria, VA
- Posts 3,402
|
Lingerance:Evo, both of those set len to 1 and end on the first iteration, therefore they are both O(1). A more correct example would be _something_ like:
//pascal string length:
// O(1);
len = buf[0];
//usage of len
//O(n); n == len
for (int i = 1; i < len; i++) buf[i];
//C string length/usage
//O(n)
len = 0;
while (*buf++) *buf, len++;
Exactly. Classic Pascal strings are O(1). C strings are O(n). And Pascal strings of arbitrary length are O(log n), because you need log_256 (n) bytes to store the size.
|
|
-
-
ActionMan


- Joined on 02-28-2007
- Posts 54
|
mallard:For those who haven't used them before, Pascal strings differ from C strings in that instead of being null-terminated, the first byte is used to hold the length. Thus they are limited to 255 characters. Their main advantages were being able to find the length in O(1) time, being able to allocate a buffer that is the "right" size when only the first byte has been read (buffer overrun possible, but most Pascal implementations check array bounds by default) and always fitting into a 256-character buffer. Outside of the Pascal programming language, the only use that I am aware of is that the Mac OS Classic API expected most strings in this form, since Mac System 1 was written with Pascal being the targetted application language. (This was before C became popular on non-Unix systems). C++ strings store the length at the front too, and are NULL terminated on-request for compatibility with C programmers.
|
|
-
-
Schnolle


- Joined on 01-28-2008
- Posts 5
|
teqman: Schnolle:Sorry, both wrong. The O(f(n)) notation is only for the case where n can grow without bounds. Here, n <= 255, so it is meaningless to speak of what happens when n goes to infinity.
Boy would I hate to be your algorithms professor. Nothing about the definition of O, theta, or omega requires N to be able to grow without bounds. Sorry you're right of course - n may have a maximum limit, in which case the expression is always O(1). Replace "meaningless" with "uninteresting" above...
|
|
-
-
WWWWolf


- Joined on 12-05-2005
- Oulu, Finland
- Posts 244
|
Soviut:Its like 9/11 times a million!...er...times 1e-122D!
Time to spread the {12,'L','o','o','s','e',' ','C','h','a','n','g','e'} DVDs around in the workplaces everywhere! Question your leaders and the official story from the java.lang.String Implementation Commission!
mysql> help contents; Nothing found Please try to run 'help contents' for a list of all accessible topics Desktop Search Rain - Gothic Computing's EASY button ( Go wild^H^H^H^H figure)
|
|
-
-
mallard


- Joined on 12-22-2005
- Posts 181
|
ActionMan:C++ strings store the length at the front too, and are NULL terminated on-request for compatibility with C programmers. Huh? I assume that by "C++ strings" you std::string? std::string is a class type and only has its interface is defined in the standard, so any particular implementation of std::string is free to store a string however the hell it wants, weather that means Pascal-style, null-terminated or using custom hardware that prints them out and OCRs them on demand.
|
|
-
-
belgariontheking


- Joined on 08-20-2007
- Cincinnati, OH, USA
- Posts 2,276
|
mallard:using custom hardware that prints them out and OCRs them on demand
Awesome. This will shift the paradigm. We must begin at once!
SpectateSwamp exposing aliens. Obviously the World needs SSDS
[10:07] <fatdog> so from now on.. be sure to wear nice clean underwear [10:07] <mps> fatdog: That is simply not going to happen
|
|
-
-
bstorer


- Joined on 02-01-2007
- Alexandria, VA
- Posts 3,402
|
mallard: ActionMan:C++ strings store the length at the front too, and are NULL terminated on-request for compatibility with C programmers. Huh? I assume that by "C++ strings" you std::string? std::string is a class type and only has its interface is defined in the standard, so any particular implementation of std::string is free to store a string however the hell it wants, weather that means Pascal-style, null-terminated or using custom hardware that prints them out and OCRs them on demand. I'm going to write one that, instead of storing the characters, generates a mathematical function that can be used to retrieve the characters by calculating f(x) where x = 1, 2, 3, ... You know you're at the end of the string when you hit a singularity.
|
|
-
-
WWWWolf


- Joined on 12-05-2005
- Oulu, Finland
- Posts 244
|
bstorer:I'm going to write one that, instead of storing the characters, generates a mathematical function that can be used to retrieve the characters by calculating f(x) where x = 1, 2, 3, ... You know you're at the end of the string when you hit a singularity.
Funny you should mention that, it reminds me of an old AWFUL idea of mine... (edit: this runs in GNU Octave)
# -*- octave -*-
# This is just about the most hideous and complicated way of printing
# "Hello World" I can think of for the moment...
# Feel free to abuse the idea.
# WWWWolf 2001-02-04
1;
letters = toascii("Hello world");
[p, evald] = polyfit([1:11],letters,13);
if (exist("want_plotted"))
plotx = (0:0.1:25)';
polydata = [plotx, polyval(p,plotx)];
chardata = [(1:11)',letters'];
gset grid xtics ytics;
gplot [0:13] [0:255] \
chardata with points title "desired values", \
polydata with lines title "fitted curve"
endif
disp(setstr(evald))
mysql> help contents; Nothing found Please try to run 'help contents' for a list of all accessible topics Desktop Search Rain - Gothic Computing's EASY button ( Go wild^H^H^H^H figure)
|
|
-
-
WWWWolf


- Joined on 12-05-2005
- Oulu, Finland
- Posts 244
|
Oooooh yeah. Okay, everyone, all together now - can you figure out why this only produces three letters and complete garbage after that? Yes, you can! It's pretty obvious, now is it?
C *** MAIN PROGRAM ***
C TRY TO FIGURE THIS OUT, BET YOU THINK IT'S SILLY...
PROGRAM HELLOWORLD
INTEGER I
CHARACTER HELLO(11)
DO 10 I = 1, 11
HELLO(I) = CHAR(INT(POLYEVAL(I)))
10 CONTINUE
WRITE(*,*) HELLO
STOP
END
C *** POLYNOMIAL EVALUATION FUNCTION ***
DOUBLE PRECISION FUNCTION POLYEVAL(X)
INTEGER X, J, K
DOUBLE PRECISION R
DOUBLE PRECISION COEFF(14)
C COEFFICIENT TABLE. SHOULD BE A DATA BLOCK OR CONSTANT OR WHATEVER?
COEFF( 1) = 1.99079381095492E-05
COEFF( 2) = -0.00115437249297999
COEFF( 3) = 0.0287925989153794
COEFF( 4) = -0.403199605046038
COEFF( 5) = 3.46465373237423
COEFF( 6) = -18.6812013367602
COEFF( 7) = 61.4275957129813
COEFF( 8) = -109.846789475099
COEFF( 9) = 64.1870784258645
COEFF(10) = 69.9070799001331
COEFF(11) = -43.5100385152995
COEFF(12) = -77.6672165963919
COEFF(13) = -1.37969637341796
COEFF(14) = 124.474075996299
C EVALUATE POLYNOMIAL.
C RESULT STORED TO R. J AND K USED SOLELY AS COUNTERS.
R = 0.0
J = 1
K = 13
DO WHILE (J .LE. 13)
R = R + COEFF(J) * X ** K
J = J + 1
K = K - 1
ENDDO
R = R + COEFF(14)
C RETURN THE VALUE.
POLYEVAL = R
RETURN
END
mysql> help contents; Nothing found Please try to run 'help contents' for a list of all accessible topics Desktop Search Rain - Gothic Computing's EASY button ( Go wild^H^H^H^H figure)
|
|
-
-
DaveK


- Joined on 02-22-2006
- Posts 1,162
|
bstorer: mallard: ActionMan:C++ strings store the length at the front too, and are NULL terminated on-request for compatibility with C programmers. Huh? I assume that by "C++ strings" you std::string? std::string
is a class type and only has its interface is defined in the standard,
so any particular implementation of std::string is free to store a
string however the hell it wants, weather that means Pascal-style,
null-terminated or using custom hardware that prints them out and OCRs
them on demand. I'm going to write one that,
instead of storing the characters, generates a mathematical function
that can be used to retrieve the characters by calculating f(x) where x
= 1, 2, 3, ... You know you're at the end of the string when you
hit a singularity. Ah,
that's an age-old tradition on the compression mailing lists /
newsgroup, that one. Every few months someone turns up who's just
invented the best compression algoroithm ever. "Ah, if I just
write a PRNG with a careful choice of the correct generator and seed
parameters I can generate any sequence of output data I need! All
I need to do is specify a couple of numbers and I can encode ANY file
at all, even RANDOM data, in almost no space at all!" Generally
the time that they arrive on the list is when they've reached the point
where they've got a working compressor, and it truely can compress any
file of any size down to a few dozen bytes, and they've just got one or
two teeny-weeny-eentsy little bugs in the decompressor to fix, but as
soon as they've fixed them they'll release their code so that everyone
can acclaim them for the genii they are. It's quite fun watching
them try to figure out why their decompressor /always/ seems to have
"just one or two" last bugs, no matter how many "last" bugs they go on
to fix... I propose that we rename this technique "Brillant compression" in future.
(USER WAS BANNED FOR THIS POST)
|
|
-
-
Aaron


- Joined on 07-10-2007
- Posts 477
|
bstorer:Exactly. Classic Pascal strings are O(1). C strings are O(n). And Pascal strings of arbitrary length are O(log n), because you need log_256 (n) bytes to store the size. Data structures aren't O(anything). You have to specify the operation you're trying to perform. If your operation is to determine the length, then yes, Pascal Strings are O(1) and C strings are O(n). If you're constructing, copying, concatenating, transforming, searching for a substring, or really doing anything non-trivial, then it's going to be an O(n) operation for both data types. Finding the length of a string is actually a pretty common operation, so it's worth optimizing (as it has been in every string type invented after the C string), but if that's what you were trying to say, then it wasn't very clear.
|
|
-
-
-
Welbog


- Joined on 02-08-2007
- Posts 586
|
Aaron:Without major changes to the datatype, how could you possibly know where the length ends and string begins?
You store, in unary, the number of bytes taken up by the size of the string.
|
|
-
-
Pidgeot


- Joined on 09-19-2007
- Posts 72
|
Aaron:
Without major changes to the datatype, how could you possibly know where the length ends and string begins?
It wouldn't necessarily take that big a change. MIDI uses a concept where the length specification is variable-length - only the lower 7 bits of the byte are actually part of the length, while the upper bit indicates if there are more length bytes.
Granted, MIDI does limit you to 4 length bytes, but the structure allows you to theoretically continue forever.
|
|
-
-
Aaron


- Joined on 07-10-2007
- Posts 477
|
Welbog:You store, in unary, the number of bytes taken up by the size of the string. Using a single 64-bit Integer for the length could represent strings up to 16 million terabytes long. Are there any practical applications where increasing the complexity and access times would actually be beneficial? Even if people actually did this, you could no longer call it a Pascal String. Hence the "without major changes" qualifier.
|
|
-
-
bstorer


- Joined on 02-01-2007
- Alexandria, VA
- Posts 3,402
|
Aaron: bstorer:Exactly. Classic Pascal strings are O(1). C strings are O(n). And Pascal strings of arbitrary length are O(log n), because you need log_256 (n) bytes to store the size. Data structures aren't O(anything). You have to specify the operation you're trying to perform.
I know this. The discussion was clearly about determining the length of an arbitrary-length Pascal string. I also hope you realize that your notion of an arbitrary-length Pascal
string is fundamentally impossible. Without major changes to the
datatype, how could you possibly know where the length ends and string
begins? This was left as an excercise to the reader, because it's of no importance when discussing the performace of the algorithm. A simple enough solution is to reserve the high bit of each character and require it to be zero on every but the last byte of size data. That way you just read bytes until data[n] & 0x80 == 1. Feel free to improve it.
|
|
-
-
Spectre


- Joined on 05-09-2007
- ::1
- Posts 771
|
mallard:std::string is a class type and only has its interface is defined in the
standard, so any particular implementation of std::string is free to store a
string however the hell it wants, weather that means Pascal-style,
null-terminated or using custom hardware that prints them out and OCRs them on
demand.
I guess I'm just being anal, but string::size() must run in constant time, so a null-terminated approach is impractical (besides, C++ strings can contain NULs). However, since they have the c_str() method, it is reasonable to append a redundant NUL to simplify implementation.
╩юфют√ь ёЄЁрэшЎрь яюЁр эр яхэёш■.
#TDWTF @ SlashNET was merged into #codelove @ the same network. You're still welcome to drop by. I guess.
|
|
-
-
bstorer


- Joined on 02-01-2007
- Alexandria, VA
- Posts 3,402
|
Aaron: Welbog:You store, in unary, the number of bytes taken up by the size of the string. Using a single 64-bit Integer for the length could represent strings up to 16 million terabytes long. Are there any practical applications where increasing the complexity and access times would actually be beneficial? Even if people actually did this, you could no longer call it a Pascal String. Hence the "without major changes" qualifier. Christ, is there some pedantry college that you people graduate from, or is it more like a trade union?
|
|
-
-
Aaron


- Joined on 07-10-2007
- Posts 477
|
bstorer:Christ, is there some pedantry college that you people graduate from, or is it more like a trade union? This isn't just pedantic - I believe it started because of a misunderstanding up higher: bstorer: mallard:The fact that
pascal strings are limited to 256 characters is because their size must
fit in a single byte, but you could have pascal-like strings using any
amount of bytes to store the length and they would still have an O(1)
time to find the length.
No, the longer the string, the
more bytes you need to store the size. So it clearly can't be constant
time. It'd have an O(log n) time to find the length, where n is the
length of the string. It's evident to me that what he was trying to say was that you could use any fixed amount of bytes to store the length, and still have O(1) performance. In other words, use 4 bytes to store the size instead of 1 byte (for example). I was pointing out the impossibility (I guess I've been corrected, although I still wouldn't call those implementations "pascal strings") to highlight the fact that finding the length of a Pascal string is always O(1) - it's fundamental to the design.
I guess I should have quoted the whole context, but it's kind of hard when it's scattered across 3 or 4 different posts. Sue me. Even if I'm wrong, and he was really talking about some as-yet-unimplemented variable-length Pascal string implementation, don't you think that you yourself were being more than a little pedantic to start up with the O(log n) business? And as for whether it's a college or trade union, it's neither - it's a hobby.
|
|
-
-
CodeSimian


- Joined on 02-08-2008
- Posts 762
|
Evo:
Nice discussion here, about the complexity... Let me write an algorithm here... int len = -1; for(int i = 0; i < 256; i++) if(len == -1 && buf[i]) len = i; This is, I guess, an O(1) algorithm. Note that, of course, buf must have a constant length of 256. However, when the algorithm is: int len; for(int i = 0; i < 256; i++) if(buf[i]) { len = 1; break; } This is O(n). But it is never slower than the first (dumb) algorithm.
Lingerance:Evo, both of those set len to 1 and end on the first iteration, therefore they are both O(1). Actually, the first algorithm does not set len to 1. It sets len to 0 (unless buf consists of 256 '\0' chars, in which case len will remain -1) and it always iterates 256 times. The second algorithm does set len to 0 (unless buf[ again consists of 256 '\0' chars.) I agree that neither algorithm is correct, and thus a discussion of their time complexity is moot. (Especially since neither algorithm does proper bounds checking on the input, considering that O() is a function of the input size. If buf is meant to contain a null-terminated C "string", we all know that you are not supposed to read past the first '\0'.)
|
|
-
-
marv


- Joined on 04-11-2008
- Posts 1
|
Aaron:Even if I'm wrong, and he was really talking about some as-yet-unimplemented variable-length Pascal string implementation, don't you think that you yourself were being more than a little pedantic to start up with the O(log n) business? Don't try to stop a theoretician. :) Also his assumption is not perfectly correct. There are two possible szenarios: First is a practical system where we need a data type that has exactly the size of the maximum string length. Then there is no reason, why such a strlen-method could be Member of O(1) - we can read the size at once. The second szenario would be a theoretical system, which has no memory limits. This system would also have no limits for the size of its data types, so every finite implementation would again be member of O(1). marvin
|
|
-
-
MasterPlanSoftware


- Joined on 11-10-2006
- Posts 108
|
marv:marvin Lingerance! We could use your signature over here!
|
|
-
-
morbiuswilters


- Joined on 01-15-2008
- East Coast Represent!
- Posts 4,990
|
MasterPlanSoftware: marv:marvin Lingerance! We could use your signature over here!
When I saw the front page today I was like "Oh shit, another Side Bar!"
|
|
-
Page 1 of 1 (46 items)
|
|
|