1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <Python.h> 27 28 #include <string.h> 29 30 static PyObject *MalformedActionError; 31 static PyObject *InvalidActionError; 32 static PyObject *UnknownActionError; 33 static PyObject *aclass_attribute; 34 static PyObject *aclass_depend; 35 static PyObject *aclass_directory; 36 static PyObject *aclass_driver; 37 static PyObject *aclass_file; 38 static PyObject *aclass_group; 39 static PyObject *aclass_hardlink; 40 static PyObject *aclass_legacy; 41 static PyObject *aclass_license; 42 static PyObject *aclass_link; 43 static PyObject *aclass_signature; 44 static PyObject *aclass_unknown; 45 static PyObject *aclass_user; 46 47 static const char *notident = "hash attribute not identical to positional hash"; 48 49 static inline int 50 add_to_attrs(PyObject *attrs, PyObject *key, PyObject *attr) 51 { 52 int ret; 53 PyObject *list; 54 PyObject *av = PyDict_GetItem(attrs, key); 55 56 if (av == NULL) 57 return (PyDict_SetItem(attrs, key, attr)); 58 59 if (PyList_CheckExact(av)) 60 return (PyList_Append(av, attr)); 61 62 if ((list = PyList_New(2)) == NULL) 63 return (-1); 64 65 /* PyList_SET_ITEM steals references. */ 66 Py_INCREF(av); 67 PyList_SET_ITEM(list, 0, av); 68 Py_INCREF(attr); 69 PyList_SET_ITEM(list, 1, attr); 70 ret = PyDict_SetItem(attrs, key, list); 71 Py_DECREF(list); 72 return (ret); 73 } 74 75 static void 76 set_malformederr(const char *str, int pos, const char *msg) 77 { 78 PyObject *val; 79 80 if ((val = Py_BuildValue("sis", str, pos, msg)) != NULL) { 81 PyErr_SetObject(MalformedActionError, val); 82 Py_DECREF(val); 83 } 84 } 85 86 static void 87 set_invaliderr(const char *str, const char *msg) 88 { 89 PyObject *val; 90 91 if ((val = Py_BuildValue("ss", str, msg)) != NULL) { 92 PyErr_SetObject(InvalidActionError, val); 93 Py_DECREF(val); 94 } 95 } 96 97 /*ARGSUSED*/ 98 static PyObject * 99 fromstr(PyObject *self, PyObject *args, PyObject *kwdict) 100 { 101 char *s = NULL; 102 char *str = NULL; 103 char *hashstr = NULL; 104 char *keystr = NULL; 105 int *slashmap = NULL; 106 int strl, typestrl; 107 int i, ks, vs, keysize; 108 int smlen, smpos; 109 char quote; 110 PyObject *act_args = NULL; 111 PyObject *act_class = NULL; 112 PyObject *act_data = NULL; 113 PyObject *action = NULL; 114 PyObject *hash = NULL; 115 PyObject *attrs = NULL; 116 PyObject *key = NULL; 117 PyObject *attr = NULL; 118 enum { 119 KEY, /* key */ 120 UQVAL, /* unquoted value */ 121 QVAL, /* quoted value */ 122 WS /* whitespace */ 123 } state; 124 125 /* 126 * If malformed() or invalid() are used, CLEANUP_REFS can only be used 127 * after. Likewise, PyMem_Free(str) should not be called before using 128 * malformed() or invalid(). Failure to order this properly will cause 129 * corruption of the exception messages. 130 */ 131 #define malformed(msg) set_malformederr(str, i, (msg)) 132 #define invalid(msg) set_invaliderr(str, (msg)) 133 #define CLEANUP_REFS \ 134 PyMem_Free(str);\ 135 Py_XDECREF(key);\ 136 Py_XDECREF(attr);\ 137 Py_XDECREF(attrs);\ 138 Py_XDECREF(hash);\ 139 free(hashstr); 140 141 /* 142 * Positional arguments must be included in the keyword argument list in 143 * the order you want them to be assigned. (A subtle point missing from 144 * the Python documentation.) 145 */ 146 static char *kwlist[] = { "string", "data", NULL }; 147 148 /* Assume data=None by default. */ 149 act_data = Py_None; 150 151 /* 152 * The action string is currently assumed to be a stream of bytes that 153 * are valid UTF-8. This method works regardless of whether the string 154 * object provided is a Unicode object, string object, or a character 155 * buffer. 156 */ 157 if (PyArg_ParseTupleAndKeywords(args, kwdict, "et#|O:fromstr", kwlist, 158 "utf-8", &str, &strl, &act_data) == 0) { 159 return (NULL); 160 } 161 162 s = strpbrk(str, " \t"); 163 164 i = strl; 165 if (s == NULL) { 166 malformed("no attributes"); 167 PyMem_Free(str); 168 return (NULL); 169 } 170 171 /* 172 * The comparisons here are ordered by frequency in which actions are 173 * most likely to be encountered in usage by the client grouped by 174 * length. Yes, a cheap hack to squeeze a tiny bit of additional 175 * performance out. 176 */ 177 typestrl = s - str; 178 if (typestrl == 4) { 179 if (strncmp(str, "file", 4) == 0) 180 act_class = aclass_file; 181 else if (strncmp(str, "link", 4) == 0) 182 act_class = aclass_link; 183 else if (strncmp(str, "user", 4) == 0) 184 act_class = aclass_user; 185 } else if (typestrl == 6) { 186 if (strncmp(str, "depend", 6) == 0) 187 act_class = aclass_depend; 188 else if (strncmp(str, "driver", 6) == 0) 189 act_class = aclass_driver; 190 else if (strncmp(str, "legacy", 6) == 0) 191 act_class = aclass_legacy; 192 } else if (typestrl == 3) { 193 if (strncmp(str, "set", 3) == 0) 194 act_class = aclass_attribute; 195 else if (strncmp(str, "dir", 3) == 0) 196 act_class = aclass_directory; 197 } else if (typestrl == 8) { 198 if (strncmp(str, "hardlink", 8) == 0) 199 act_class = aclass_hardlink; 200 } else if (typestrl == 7) { 201 if (strncmp(str, "license", 7) == 0) 202 act_class = aclass_license; 203 else if (strncmp(str, "unknown", 7) == 0) 204 act_class = aclass_unknown; 205 } else if (typestrl == 9) { 206 if (strncmp(str, "signature", 9) == 0) 207 act_class = aclass_signature; 208 } else if (typestrl == 5) { 209 if (strncmp(str, "group", 5) == 0) 210 act_class = aclass_group; 211 } 212 213 if (act_class == NULL) { 214 if ((act_args = Py_BuildValue("s#s#", str, strl, 215 str, typestrl)) != NULL) { 216 PyErr_SetObject(UnknownActionError, act_args); 217 Py_DECREF(act_args); 218 PyMem_Free(str); 219 return (NULL); 220 } 221 222 /* 223 * Unable to build argument list for exception; so raise 224 * general type exception instead. 225 */ 226 PyErr_SetString(PyExc_TypeError, "unknown action type"); 227 PyMem_Free(str); 228 return (NULL); 229 } 230 231 ks = vs = typestrl; 232 state = WS; 233 if ((attrs = PyDict_New()) == NULL) { 234 PyMem_Free(str); 235 return (NULL); 236 } 237 for (i = s - str; str[i]; i++) { 238 if (state == KEY) { 239 keysize = i - ks; 240 keystr = &str[ks]; 241 242 if (str[i] == ' ' || str[i] == '\t') { 243 if (PyDict_Size(attrs) > 0 || hash != NULL) { 244 malformed("whitespace in key"); 245 CLEANUP_REFS; 246 return (NULL); 247 } else { 248 if ((hash = PyString_FromStringAndSize( 249 keystr, keysize)) == NULL) { 250 CLEANUP_REFS; 251 return (NULL); 252 } 253 hashstr = strndup(keystr, keysize); 254 state = WS; 255 } 256 } else if (str[i] == '=') { 257 if ((key = PyString_FromStringAndSize( 258 keystr, keysize)) == NULL) { 259 CLEANUP_REFS; 260 return (NULL); 261 } 262 263 if (keysize == 4 && strncmp(keystr, "data", 264 keysize) == 0) { 265 invalid("invalid key: 'data'"); 266 CLEANUP_REFS; 267 return (NULL); 268 } 269 270 /* 271 * Pool attribute key to reduce memory usage and 272 * potentially improve lookup performance. 273 */ 274 PyString_InternInPlace(&key); 275 276 if (i == ks) { 277 malformed("impossible: missing key"); 278 CLEANUP_REFS; 279 return (NULL); 280 } else if (++i == strl) { 281 malformed("missing value"); 282 CLEANUP_REFS; 283 return (NULL); 284 } 285 if (str[i] == '\'' || str[i] == '\"') { 286 state = QVAL; 287 quote = str[i]; 288 vs = i + 1; 289 } else if (str[i] == ' ' || str[i] == '\t') { 290 malformed("missing value"); 291 CLEANUP_REFS; 292 return (NULL); 293 } else { 294 state = UQVAL; 295 vs = i; 296 } 297 } else if (str[i] == '\'' || str[i] == '\"') { 298 malformed("quote in key"); 299 CLEANUP_REFS; 300 return (NULL); 301 } 302 } else if (state == QVAL) { 303 if (str[i] == '\\') { 304 if (i == strl - 1) 305 break; 306 /* 307 * "slashmap" is a list of the positions of the 308 * backslashes that need to be removed from the 309 * final attribute string. 310 */ 311 if (slashmap == NULL) { 312 smlen = 16; 313 slashmap = calloc(smlen, sizeof (int)); 314 if (slashmap == NULL) { 315 PyMem_Free(str); 316 return (PyErr_NoMemory()); 317 } 318 smpos = 0; 319 /* 320 * Terminate slashmap with an invalid 321 * value so we don't think there's a 322 * slash right at the beginning. 323 */ 324 slashmap[smpos] = -1; 325 } else if (smpos == smlen - 1) { 326 smlen *= 2; 327 slashmap = realloc(slashmap, 328 smlen * sizeof (int)); 329 if (slashmap == NULL) { 330 PyMem_Free(str); 331 return (PyErr_NoMemory()); 332 } 333 } 334 i++; 335 if (str[i] == '\\' || str[i] == quote) { 336 slashmap[smpos++] = i - 1 - vs; 337 /* 338 * Keep slashmap properly terminated so 339 * that a realloc()ed array doesn't give 340 * us random slash positions. 341 */ 342 slashmap[smpos] = -1; 343 } 344 } else if (str[i] == quote) { 345 state = WS; 346 if (slashmap != NULL) { 347 char *sattr; 348 int j, o, attrlen; 349 350 attrlen = i - vs; 351 sattr = calloc(1, attrlen + 1); 352 if (sattr == NULL) { 353 PyMem_Free(str); 354 free(slashmap); 355 return (PyErr_NoMemory()); 356 } 357 /* 358 * Copy the attribute from str into 359 * sattr, removing backslashes as 360 * slashmap indicates we should. 361 */ 362 for (j = 0, o = 0; j < attrlen; j++) { 363 if (slashmap[o] == j) { 364 o++; 365 continue; 366 } 367 sattr[j - o] = str[vs + j]; 368 } 369 370 free(slashmap); 371 slashmap = NULL; 372 373 if ((attr = PyString_FromStringAndSize( 374 sattr, attrlen - o)) == NULL) { 375 free(sattr); 376 CLEANUP_REFS; 377 return (NULL); 378 } 379 free(sattr); 380 } else { 381 Py_XDECREF(attr); 382 if ((attr = PyString_FromStringAndSize( 383 &str[vs], i - vs)) == NULL) { 384 CLEANUP_REFS; 385 return (NULL); 386 } 387 } 388 389 if (strncmp(keystr, "hash=", 5) == 0) { 390 char *as = PyString_AsString(attr); 391 if (hashstr && strcmp(as, hashstr)) { 392 invalid(notident); 393 CLEANUP_REFS; 394 return (NULL); 395 } 396 hash = attr; 397 attr = NULL; 398 } else { 399 PyString_InternInPlace(&attr); 400 if (add_to_attrs(attrs, key, 401 attr) == -1) { 402 CLEANUP_REFS; 403 return (NULL); 404 } 405 } 406 } 407 } else if (state == UQVAL) { 408 if (str[i] == ' ' || str[i] == '\t') { 409 state = WS; 410 Py_XDECREF(attr); 411 attr = PyString_FromStringAndSize(&str[vs], 412 i - vs); 413 if (strncmp(keystr, "hash=", 5) == 0) { 414 char *as = PyString_AsString(attr); 415 if (hashstr && strcmp(as, hashstr)) { 416 invalid(notident); 417 CLEANUP_REFS; 418 return (NULL); 419 } 420 hash = attr; 421 attr = NULL; 422 } else { 423 PyString_InternInPlace(&attr); 424 if (add_to_attrs(attrs, key, 425 attr) == -1) { 426 CLEANUP_REFS; 427 return (NULL); 428 } 429 } 430 } 431 } else if (state == WS) { 432 if (str[i] != ' ' && str[i] != '\t') { 433 state = KEY; 434 ks = i; 435 if (str[i] == '=') { 436 malformed("missing key"); 437 CLEANUP_REFS; 438 return (NULL); 439 } 440 } 441 } 442 } 443 444 /* 445 * UQVAL is the most frequently encountered end-state, so check that 446 * first to avoid unnecessary state comparisons. 447 */ 448 if (state == UQVAL) { 449 Py_XDECREF(attr); 450 attr = PyString_FromStringAndSize(&str[vs], i - vs); 451 if (strncmp(keystr, "hash=", 5) == 0) { 452 char *as = PyString_AsString(attr); 453 if (hashstr && strcmp(as, hashstr)) { 454 invalid(notident); 455 CLEANUP_REFS; 456 return (NULL); 457 } 458 hash = attr; 459 attr = NULL; 460 } else { 461 PyString_InternInPlace(&attr); 462 if (add_to_attrs(attrs, key, attr) == -1) { 463 CLEANUP_REFS; 464 return (NULL); 465 } 466 } 467 } else if (state == QVAL) { 468 if (slashmap != NULL) 469 free(slashmap); 470 471 malformed("unfinished quoted value"); 472 CLEANUP_REFS; 473 return (NULL); 474 } else if (state == KEY) { 475 malformed("missing value"); 476 CLEANUP_REFS; 477 return (NULL); 478 } 479 480 PyMem_Free(str); 481 Py_XDECREF(key); 482 Py_XDECREF(attr); 483 484 /* 485 * Action parsing is done; now build the list of arguments to construct 486 * the object for it. 487 */ 488 if ((act_args = Py_BuildValue("(O)", act_data)) == NULL) { 489 if (hash != NULL && hash != Py_None) 490 Py_DECREF(hash); 491 Py_DECREF(attrs); 492 return (NULL); 493 } 494 495 /* 496 * Using the cached action class assigned earlier based on the type, 497 * call the action constructor, set the hash attribute, and then return 498 * the new action object. 499 */ 500 action = PyObject_Call(act_class, act_args, attrs); 501 Py_DECREF(act_args); 502 Py_DECREF(attrs); 503 if (action == NULL) { 504 if (hash != NULL && hash != Py_None) 505 Py_DECREF(hash); 506 return (NULL); 507 } 508 509 if (hash != NULL && hash != Py_None) { 510 if (PyObject_SetAttrString(action, "hash", hash) == -1) { 511 Py_DECREF(hash); 512 Py_DECREF(action); 513 return (NULL); 514 } 515 Py_DECREF(hash); 516 } 517 518 return (action); 519 } 520 521 static PyMethodDef methods[] = { 522 { "fromstr", (PyCFunction)fromstr, METH_VARARGS | METH_KEYWORDS }, 523 { NULL, NULL, 0, NULL } 524 }; 525 526 PyMODINIT_FUNC 527 init_actions(void) 528 { 529 PyObject *action_types = NULL; 530 PyObject *pkg_actions = NULL; 531 PyObject *sys = NULL; 532 PyObject *sys_modules = NULL; 533 534 /* 535 * Note that module initialization functions are void and may not return 536 * a value. However, they should set an exception if appropriate. 537 */ 538 if (Py_InitModule("_actions", methods) == NULL) 539 return; 540 541 /* 542 * We need to retrieve the MalformedActionError object from pkg.actions. 543 * We can't import pkg.actions directly, because that would result in a 544 * circular dependency. But the "sys" module has a dict called 545 * "modules" which maps loaded module names to the corresponding module 546 * objects. We can then grab the exception from those objects. 547 */ 548 549 if ((sys = PyImport_ImportModule("sys")) == NULL) 550 return; 551 552 if ((sys_modules = PyObject_GetAttrString(sys, "modules")) == NULL) 553 return; 554 555 if ((pkg_actions = PyDict_GetItemString(sys_modules, "pkg.actions")) 556 == NULL) { 557 /* No exception is set */ 558 PyErr_SetString(PyExc_KeyError, "pkg.actions"); 559 Py_DECREF(sys_modules); 560 return; 561 } 562 Py_DECREF(sys_modules); 563 564 /* 565 * Each reference is DECREF'd after retrieval as Python 2.x doesn't 566 * provide a module shutdown/cleanup hook. Since these references are 567 * guaranteed to stay around until the module is unloaded, DECREF'ing 568 * them now ensures that garbage cleanup will work as expected during 569 * process exit. This applies to the action type caching below as well. 570 */ 571 MalformedActionError = \ 572 PyObject_GetAttrString(pkg_actions, "MalformedActionError"); 573 Py_DECREF(MalformedActionError); 574 InvalidActionError = \ 575 PyObject_GetAttrString(pkg_actions, "InvalidActionError"); 576 Py_DECREF(InvalidActionError); 577 UnknownActionError = \ 578 PyObject_GetAttrString(pkg_actions, "UnknownActionError"); 579 Py_DECREF(UnknownActionError); 580 581 /* 582 * Retrieve the list of action types and then store a reference to each 583 * class for use during action construction. (This allows avoiding the 584 * overhead of retrieving a new reference for each action constructed.) 585 */ 586 if ((action_types = PyObject_GetAttrString(pkg_actions, 587 "types")) == NULL) { 588 PyErr_SetString(PyExc_KeyError, "pkg.actions.types missing!"); 589 return; 590 } 591 592 /* 593 * cache_class borrows the references to the action type objects; this 594 * is safe as they should remain valid as long as the module is loaded. 595 * (PyDict_GetItem* doesn't return a new reference.) 596 */ 597 #define cache_class(cache_var, name) \ 598 if ((cache_var = PyDict_GetItemString(action_types, name)) == NULL) { \ 599 PyErr_SetString(PyExc_KeyError, \ 600 "Action type class missing: " name); \ 601 Py_DECREF(action_types); \ 602 return; \ 603 } 604 605 cache_class(aclass_attribute, "set"); 606 cache_class(aclass_depend, "depend"); 607 cache_class(aclass_directory, "dir"); 608 cache_class(aclass_driver, "driver"); 609 cache_class(aclass_file, "file"); 610 cache_class(aclass_group, "group"); 611 cache_class(aclass_hardlink, "hardlink"); 612 cache_class(aclass_legacy, "legacy"); 613 cache_class(aclass_license, "license"); 614 cache_class(aclass_link, "link"); 615 cache_class(aclass_signature, "signature"); 616 cache_class(aclass_unknown, "unknown"); 617 cache_class(aclass_user, "user"); 618 619 Py_DECREF(action_types); 620 } 621
