Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:15:04 +08:00
commit ec0d1b5905
19 changed files with 5696 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
{
"name": "programming-skills",
"description": "Collection of skills used for various programming tasks",
"version": "0.0.0-2025.11.28",
"author": {
"name": "Falko Noé",
"email": "falkonoe@gmail.com"
},
"skills": [
"./skills/shell-scripting",
"./skills/r-development",
"./skills/python-style-guide",
"./skills/pixi"
]
}

3
README.md Normal file
View File

@@ -0,0 +1,3 @@
# programming-skills
Collection of skills used for various programming tasks

104
plugin.lock.json Normal file
View File

@@ -0,0 +1,104 @@
{
"$schema": "internal://schemas/plugin.lock.v1.json",
"pluginId": "gh:CodingKaiser/claude-kaiser-skills:programming-skills",
"normalized": {
"repo": null,
"ref": "refs/tags/v20251128.0",
"commit": "eb44df3f7d48b76ef0143ee8f93d0d637a510756",
"treeHash": "a891b505518265eba1bfb02b421971f74b39e284194ab1b43dfdd4f965ee91a3",
"generatedAt": "2025-11-28T10:10:03.663248Z",
"toolVersion": "publish_plugins.py@0.2.0"
},
"origin": {
"remote": "git@github.com:zhongweili/42plugin-data.git",
"branch": "master",
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
},
"manifest": {
"name": "programming-skills",
"description": "Collection of skills used for various programming tasks"
},
"content": {
"files": [
{
"path": "README.md",
"sha256": "e557525503f5cff708690de58a73d8405e2919a16a7d0dd909449017ea950c4f"
},
{
"path": ".claude-plugin/plugin.json",
"sha256": "358f3defc2ff66da95972f2bfbead49442899a9c6e2281b4408e3ac46b5b00e8"
},
{
"path": "skills/pixi/LICENSE",
"sha256": "936fd8bfe2a7b0bb13f939135ceb73ece2ec0fdea04fcb571d7c3d8be885a29c"
},
{
"path": "skills/pixi/SKILL.md",
"sha256": "bfaa74c06182d089be380de6c61449b8cda50d14fc075cc92c3421253e975caf"
},
{
"path": "skills/shell-scripting/README.md",
"sha256": "4d4d2e417c3738fd5ff0b0eac0024f7f4d2ffb735f8e4ef23eac103d3cb676e8"
},
{
"path": "skills/shell-scripting/SKILL.md",
"sha256": "440600e15f5e4868366a27043feb712d0a3deaf52a7752d9ca8e9296e0829f2a"
},
{
"path": "skills/shell-scripting/references/template.sh",
"sha256": "e365addd9cd11482d5ba23b2ff242cab442b1dd1326f2a35a9952f85d296cedf"
},
{
"path": "skills/shell-scripting/references/patterns.md",
"sha256": "f801cdf109642ea1c09e6d411e16c6d6ce27ebc8be095cca6240021bd03694b8"
},
{
"path": "skills/python-style-guide/LICENSE",
"sha256": "7e7170e3cebf88a9f60c7b8421418323c09304da1af4d5e90f4da1dc1c8a2661"
},
{
"path": "skills/python-style-guide/SKILL.md",
"sha256": "f12bade5ad7ad86b2170d1e0e8384d124805307f678088574c04142f37bdcbf8"
},
{
"path": "skills/python-style-guide/references/docstring_examples.md",
"sha256": "5f1b41345546235f1f13525689a91cc0696d15d62d2aa478d2585f6d6e24c2d2"
},
{
"path": "skills/python-style-guide/references/advanced_types.md",
"sha256": "5dffa3bec8a81804c04fbadc035cd1ae4490cdac588c5e2b521e2c832e94e0ac"
},
{
"path": "skills/python-style-guide/references/antipatterns.md",
"sha256": "89c834f62b88bd8f31548b666f944ecfdb58ae8af32a0513f7cac6a9888a5388"
},
{
"path": "skills/r-development/SKILL.md",
"sha256": "c0afa20c816cb54aab14cb7256f5bbf435b795976b327e614c6be0bc6d119333"
},
{
"path": "skills/r-development/references/rlang-patterns.md",
"sha256": "e1f94f60d6312de8b0cdcf16e0f0d3a5096f246f8358bf7594e733ba886c757b"
},
{
"path": "skills/r-development/references/object-systems.md",
"sha256": "d00e48f01108a21806db1ef0f6d5606e5c183f8a0d3b7be8fba90f5bbf34e69b"
},
{
"path": "skills/r-development/references/performance.md",
"sha256": "9d7558bd6b62d65e353da06394c317db7f08d639e0567281562a3c4d1a4fe4d2"
},
{
"path": "skills/r-development/references/package-development.md",
"sha256": "c4ab3453e58ab5623f98aeeb3a572c05102afe4dd0ba5d7a0ba73712f57c326f"
}
],
"dirSha256": "a891b505518265eba1bfb02b421971f74b39e284194ab1b43dfdd4f965ee91a3"
},
"security": {
"scannedAt": null,
"scannerVersion": null,
"flags": []
}
}

28
skills/pixi/LICENSE Normal file
View File

@@ -0,0 +1,28 @@
BSD 3-Clause License
Copyright (c) 2025, UW Scientific Software Engineering Center
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

1286
skills/pixi/SKILL.md Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,395 @@
Attribution 4.0 International
=======================================================================
Creative Commons Corporation ("Creative Commons") is not a law firm and
does not provide legal services or legal advice. Distribution of
Creative Commons public licenses does not create a lawyer-client or
other relationship. Creative Commons makes its licenses and related
information available on an "as-is" basis. Creative Commons gives no
warranties regarding its licenses, any material licensed under their
terms and conditions, or any related information. Creative Commons
disclaims all liability for damages resulting from their use to the
fullest extent possible.
Using Creative Commons Public Licenses
Creative Commons public licenses provide a standard set of terms and
conditions that creators and other rights holders may use to share
original works of authorship and other material subject to copyright
and certain other rights specified in the public license below. The
following considerations are for informational purposes only, are not
exhaustive, and do not form part of our licenses.
Considerations for licensors: Our public licenses are
intended for use by those authorized to give the public
permission to use material in ways otherwise restricted by
copyright and certain other rights. Our licenses are
irrevocable. Licensors should read and understand the terms
and conditions of the license they choose before applying it.
Licensors should also secure all rights necessary before
applying our licenses so that the public can reuse the
material as expected. Licensors should clearly mark any
material not subject to the license. This includes other CC-
licensed material, or material used under an exception or
limitation to copyright. More considerations for licensors:
wiki.creativecommons.org/Considerations_for_licensors
Considerations for the public: By using one of our public
licenses, a licensor grants the public permission to use the
licensed material under specified terms and conditions. If
the licensor's permission is not necessary for any reason--for
example, because of any applicable exception or limitation to
copyright--then that use is not regulated by the license. Our
licenses grant only permissions under copyright and certain
other rights that a licensor has authority to grant. Use of
the licensed material may still be restricted for other
reasons, including because others have copyright or other
rights in the material. A licensor may make special requests,
such as asking that all changes be marked or described.
Although not required by our licenses, you are encouraged to
respect those requests where reasonable. More_considerations
for the public:
wiki.creativecommons.org/Considerations_for_licensees
=======================================================================
Creative Commons Attribution 4.0 International Public License
By exercising the Licensed Rights (defined below), You accept and agree
to be bound by the terms and conditions of this Creative Commons
Attribution 4.0 International Public License ("Public License"). To the
extent this Public License may be interpreted as a contract, You are
granted the Licensed Rights in consideration of Your acceptance of
these terms and conditions, and the Licensor grants You such rights in
consideration of benefits the Licensor receives from making the
Licensed Material available under these terms and conditions.
Section 1 -- Definitions.
a. Adapted Material means material subject to Copyright and Similar
Rights that is derived from or based upon the Licensed Material
and in which the Licensed Material is translated, altered,
arranged, transformed, or otherwise modified in a manner requiring
permission under the Copyright and Similar Rights held by the
Licensor. For purposes of this Public License, where the Licensed
Material is a musical work, performance, or sound recording,
Adapted Material is always produced where the Licensed Material is
synched in timed relation with a moving image.
b. Adapter's License means the license You apply to Your Copyright
and Similar Rights in Your contributions to Adapted Material in
accordance with the terms and conditions of this Public License.
c. Copyright and Similar Rights means copyright and/or similar rights
closely related to copyright including, without limitation,
performance, broadcast, sound recording, and Sui Generis Database
Rights, without regard to how the rights are labeled or
categorized. For purposes of this Public License, the rights
specified in Section 2(b)(1)-(2) are not Copyright and Similar
Rights.
d. Effective Technological Measures means those measures that, in the
absence of proper authority, may not be circumvented under laws
fulfilling obligations under Article 11 of the WIPO Copyright
Treaty adopted on December 20, 1996, and/or similar international
agreements.
e. Exceptions and Limitations means fair use, fair dealing, and/or
any other exception or limitation to Copyright and Similar Rights
that applies to Your use of the Licensed Material.
f. Licensed Material means the artistic or literary work, database,
or other material to which the Licensor applied this Public
License.
g. Licensed Rights means the rights granted to You subject to the
terms and conditions of this Public License, which are limited to
all Copyright and Similar Rights that apply to Your use of the
Licensed Material and that the Licensor has authority to license.
h. Licensor means the individual(s) or entity(ies) granting rights
under this Public License.
i. Share means to provide material to the public by any means or
process that requires permission under the Licensed Rights, such
as reproduction, public display, public performance, distribution,
dissemination, communication, or importation, and to make material
available to the public including in ways that members of the
public may access the material from a place and at a time
individually chosen by them.
j. Sui Generis Database Rights means rights other than copyright
resulting from Directive 96/9/EC of the European Parliament and of
the Council of 11 March 1996 on the legal protection of databases,
as amended and/or succeeded, as well as other essentially
equivalent rights anywhere in the world.
k. You means the individual or entity exercising the Licensed Rights
under this Public License. Your has a corresponding meaning.
Section 2 -- Scope.
a. License grant.
1. Subject to the terms and conditions of this Public License,
the Licensor hereby grants You a worldwide, royalty-free,
non-sublicensable, non-exclusive, irrevocable license to
exercise the Licensed Rights in the Licensed Material to:
a. reproduce and Share the Licensed Material, in whole or
in part; and
b. produce, reproduce, and Share Adapted Material.
2. Exceptions and Limitations. For the avoidance of doubt, where
Exceptions and Limitations apply to Your use, this Public
License does not apply, and You do not need to comply with
its terms and conditions.
3. Term. The term of this Public License is specified in Section
6(a).
4. Media and formats; technical modifications allowed. The
Licensor authorizes You to exercise the Licensed Rights in
all media and formats whether now known or hereafter created,
and to make technical modifications necessary to do so. The
Licensor waives and/or agrees not to assert any right or
authority to forbid You from making technical modifications
necessary to exercise the Licensed Rights, including
technical modifications necessary to circumvent Effective
Technological Measures. For purposes of this Public License,
simply making modifications authorized by this Section 2(a)
(4) never produces Adapted Material.
5. Downstream recipients.
a. Offer from the Licensor -- Licensed Material. Every
recipient of the Licensed Material automatically
receives an offer from the Licensor to exercise the
Licensed Rights under the terms and conditions of this
Public License.
b. No downstream restrictions. You may not offer or impose
any additional or different terms or conditions on, or
apply any Effective Technological Measures to, the
Licensed Material if doing so restricts exercise of the
Licensed Rights by any recipient of the Licensed
Material.
6. No endorsement. Nothing in this Public License constitutes or
may be construed as permission to assert or imply that You
are, or that Your use of the Licensed Material is, connected
with, or sponsored, endorsed, or granted official status by,
the Licensor or others designated to receive attribution as
provided in Section 3(a)(1)(A)(i).
b. Other rights.
1. Moral rights, such as the right of integrity, are not
licensed under this Public License, nor are publicity,
privacy, and/or other similar personality rights; however, to
the extent possible, the Licensor waives and/or agrees not to
assert any such rights held by the Licensor to the limited
extent necessary to allow You to exercise the Licensed
Rights, but not otherwise.
2. Patent and trademark rights are not licensed under this
Public License.
3. To the extent possible, the Licensor waives any right to
collect royalties from You for the exercise of the Licensed
Rights, whether directly or through a collecting society
under any voluntary or waivable statutory or compulsory
licensing scheme. In all other cases the Licensor expressly
reserves any right to collect such royalties.
Section 3 -- License Conditions.
Your exercise of the Licensed Rights is expressly made subject to the
following conditions.
a. Attribution.
1. If You Share the Licensed Material (including in modified
form), You must:
a. retain the following if it is supplied by the Licensor
with the Licensed Material:
i. identification of the creator(s) of the Licensed
Material and any others designated to receive
attribution, in any reasonable manner requested by
the Licensor (including by pseudonym if
designated);
ii. a copyright notice;
iii. a notice that refers to this Public License;
iv. a notice that refers to the disclaimer of
warranties;
v. a URI or hyperlink to the Licensed Material to the
extent reasonably practicable;
b. indicate if You modified the Licensed Material and
retain an indication of any previous modifications; and
c. indicate the Licensed Material is licensed under this
Public License, and include the text of, or the URI or
hyperlink to, this Public License.
2. You may satisfy the conditions in Section 3(a)(1) in any
reasonable manner based on the medium, means, and context in
which You Share the Licensed Material. For example, it may be
reasonable to satisfy the conditions by providing a URI or
hyperlink to a resource that includes the required
information.
3. If requested by the Licensor, You must remove any of the
information required by Section 3(a)(1)(A) to the extent
reasonably practicable.
4. If You Share Adapted Material You produce, the Adapter's
License You apply must not prevent recipients of the Adapted
Material from complying with this Public License.
Section 4 -- Sui Generis Database Rights.
Where the Licensed Rights include Sui Generis Database Rights that
apply to Your use of the Licensed Material:
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
to extract, reuse, reproduce, and Share all or a substantial
portion of the contents of the database;
b. if You include all or a substantial portion of the database
contents in a database in which You have Sui Generis Database
Rights, then the database in which You have Sui Generis Database
Rights (but not its individual contents) is Adapted Material; and
c. You must comply with the conditions in Section 3(a) if You Share
all or a substantial portion of the contents of the database.
For the avoidance of doubt, this Section 4 supplements and does not
replace Your obligations under this Public License where the Licensed
Rights include other Copyright and Similar Rights.
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
c. The disclaimer of warranties and limitation of liability provided
above shall be interpreted in a manner that, to the extent
possible, most closely approximates an absolute disclaimer and
waiver of all liability.
Section 6 -- Term and Termination.
a. This Public License applies for the term of the Copyright and
Similar Rights licensed here. However, if You fail to comply with
this Public License, then Your rights under this Public License
terminate automatically.
b. Where Your right to use the Licensed Material has terminated under
Section 6(a), it reinstates:
1. automatically as of the date the violation is cured, provided
it is cured within 30 days of Your discovery of the
violation; or
2. upon express reinstatement by the Licensor.
For the avoidance of doubt, this Section 6(b) does not affect any
right the Licensor may have to seek remedies for Your violations
of this Public License.
c. For the avoidance of doubt, the Licensor may also offer the
Licensed Material under separate terms or conditions or stop
distributing the Licensed Material at any time; however, doing so
will not terminate this Public License.
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
License.
Section 7 -- Other Terms and Conditions.
a. The Licensor shall not be bound by any additional or different
terms or conditions communicated by You unless expressly agreed.
b. Any arrangements, understandings, or agreements regarding the
Licensed Material not stated herein are separate from and
independent of the terms and conditions of this Public License.
Section 8 -- Interpretation.
a. For the avoidance of doubt, this Public License does not, and
shall not be interpreted to, reduce, limit, restrict, or impose
conditions on any use of the Licensed Material that could lawfully
be made without permission under this Public License.
b. To the extent possible, if any provision of this Public License is
deemed unenforceable, it shall be automatically reformed to the
minimum extent necessary to make it enforceable. If the provision
cannot be reformed, it shall be severed from this Public License
without affecting the enforceability of the remaining terms and
conditions.
c. No term or condition of this Public License will be waived and no
failure to comply consented to unless expressly agreed to by the
Licensor.
d. Nothing in this Public License constitutes or may be interpreted
as a limitation upon, or waiver of, any privileges and immunities
that apply to the Licensor or You, including from the legal
processes of any jurisdiction or authority.
=======================================================================
Creative Commons is not a party to its public
licenses. Notwithstanding, Creative Commons may elect to apply one of
its public licenses to material it publishes and in those instances
will be considered the “Licensor.” The text of the Creative Commons
public licenses is dedicated to the public domain under the CC0 Public
Domain Dedication. Except for the limited purpose of indicating that
material is shared under a Creative Commons public license or as
otherwise permitted by the Creative Commons policies published at
creativecommons.org/policies, Creative Commons does not authorize the
use of the trademark "Creative Commons" or any other trademark or logo
of Creative Commons without its prior written consent including,
without limitation, in connection with any unauthorized modifications
to any of its public licenses or any other arrangements,
understandings, or agreements concerning use of licensed material. For
the avoidance of doubt, this paragraph does not form part of the
public licenses.
Creative Commons may be contacted at creativecommons.org.

View File

@@ -0,0 +1,482 @@
---
name: python-style-guide
description: Comprehensive Python programming guidelines based on Google's Python Style Guide. Use when Claude needs to write Python code, review Python code for style issues, refactor Python code, or provide Python programming guidance. Covers language rules (imports, exceptions, type annotations), style rules (naming conventions, formatting, docstrings), and best practices for clean, maintainable Python code.
license: Complete terms in LICENSE.txt
---
# Python Style Guide
Comprehensive guidelines for writing clean, maintainable Python code based on [Google's Python Style Guide](https://google.github.io/styleguide/pyguide.html).
## Core Philosophy
**BE CONSISTENT.** Match the style of the code around you. Use these guidelines as defaults, but always prioritize consistency with existing code.
## Language Rules
### Imports
Use `import` statements for packages and modules only, not for individual classes or functions.
**Yes:**
```python
from doctor.who import jodie
import sound_effects.utils
```
**No:**
```python
from sound_effects.utils import EffectsRegistry # Don't import classes directly
```
#### Import Formatting
- Group imports: standard library, third-party, application-specific
- Alphabetize within each group
- Use absolute imports (not relative imports)
- One import per line (except for multiple items from `typing` or `collections.abc`)
```python
# Standard library
import os
import sys
# Third-party
import numpy as np
import tensorflow as tf
# Application-specific
from myproject.backend import api_utils
```
### Exceptions
Use exceptions appropriately. Do not suppress errors with bare `except:` clauses.
**Yes:**
```python
try:
result = risky_operation()
except ValueError as e:
logging.error(f"Invalid value: {e}")
raise
```
**No:**
```python
try:
result = risky_operation()
except: # Too broad, hides bugs
pass
```
### Type Annotations
Annotate all function signatures. Type annotations improve code readability and catch errors early.
**General rules:**
- Annotate all public APIs
- Use built-in types (`list`, `dict`, `set`) instead of `typing.List`, etc. (Python 3.9+)
- Import typing symbols directly: `from typing import Any, Union`
- Use `None` instead of `type(None)` or `NoneType`
```python
def fetch_data(url: str, timeout: int = 30) -> dict[str, Any]:
"""Fetch data from URL."""
...
def process_items(items: list[str]) -> None:
"""Process a list of items."""
...
```
### Default Argument Values
Never use mutable objects as default values in function definitions.
**Yes:**
```python
def foo(a: int, b: list[int] | None = None) -> None:
if b is None:
b = []
```
**No:**
```python
def foo(a: int, b: list[int] = []) -> None: # Mutable default - WRONG!
b.append(a)
```
### True/False Evaluations
Use implicit false where possible. Empty sequences, `None`, and `0` are false in boolean contexts.
**Yes:**
```python
if not users: # Preferred
if not some_dict:
if value:
```
**No:**
```python
if len(users) == 0: # Verbose
if users == []:
if value == True: # Never compare to True/False explicitly
```
### Comprehensions & Generators
Use comprehensions and generators for simple cases. Keep them readable.
**Yes:**
```python
result = [x for x in data if x > 0]
squares = (x**2 for x in range(10))
```
**No:**
```python
# Too complex
result = [
x.strip().lower() for x in data
if x and len(x) > 5 and not x.startswith('#')
for y in x.split(',') if y
] # Use a regular loop instead
```
### Lambda Functions
Use lambdas for one-liners only. For anything complex, define a proper function.
**Yes:**
```python
sorted(data, key=lambda x: x.timestamp)
```
**Acceptable but prefer named function:**
```python
def get_timestamp(item):
return item.timestamp
sorted(data, key=get_timestamp)
```
## Style Rules
### Line Length
Maximum line length: 80 characters. Exceptions allowed for imports, URLs, and long strings that can't be broken.
### Indentation
Use 4 spaces per indentation level. Never use tabs.
For hanging indents, align wrapped elements vertically or use 4-space hanging indent:
```python
# Aligned with opening delimiter
foo = long_function_name(var_one, var_two,
var_three, var_four)
# Hanging indent (4 spaces)
foo = long_function_name(
var_one, var_two, var_three,
var_four)
```
### Blank Lines
- Two blank lines between top-level definitions
- One blank line between method definitions
- Use blank lines sparingly within functions to show logical sections
### Naming Conventions
| Type | Convention | Examples |
|------|-----------|----------|
| Packages/Modules | `lower_with_under` | `my_module.py` |
| Classes | `CapWords` | `MyClass` |
| Functions/Methods | `lower_with_under()` | `my_function()` |
| Constants | `CAPS_WITH_UNDER` | `MAX_SIZE` |
| Variables | `lower_with_under` | `my_var` |
| Private | `_leading_underscore` | `_private_var` |
**Avoid:**
- Single character names except for counters/iterators (`i`, `j`, `k`)
- Dashes in any name
- `__double_leading_and_trailing_underscore__` (reserved for Python)
### Comments and Docstrings
#### Docstring Format
Use Google-style docstrings for all public modules, functions, classes, and methods.
**Function docstring:**
```python
def fetch_smalltable_rows(
table_handle: smalltable.Table,
keys: Sequence[bytes | str],
require_all_keys: bool = False,
) -> Mapping[bytes, tuple[str, ...]]:
"""Fetches rows from a Smalltable.
Retrieves rows pertaining to the given keys from the Table instance
represented by table_handle. String keys will be UTF-8 encoded.
Args:
table_handle: An open smalltable.Table instance.
keys: A sequence of strings representing the key of each table
row to fetch. String keys will be UTF-8 encoded.
require_all_keys: If True, raise ValueError if any key is missing.
Returns:
A dict mapping keys to the corresponding table row data
fetched. Each row is represented as a tuple of strings.
Raises:
IOError: An error occurred accessing the smalltable.
ValueError: A key is missing and require_all_keys is True.
"""
...
```
**Class docstring:**
```python
class SampleClass:
"""Summary of class here.
Longer class information...
Longer class information...
Attributes:
likes_spam: A boolean indicating if we like SPAM or not.
eggs: An integer count of the eggs we have laid.
"""
def __init__(self, likes_spam: bool = False):
"""Initializes the instance based on spam preference.
Args:
likes_spam: Defines if instance exhibits this preference.
"""
self.likes_spam = likes_spam
self.eggs = 0
```
#### Block and Inline Comments
- Use complete sentences with proper capitalization
- Block comments indent to the same level as the code
- Inline comments should be separated by at least 2 spaces
- Use inline comments sparingly
```python
# Block comment explaining the following code.
# Can span multiple lines.
x = x + 1 # Inline comment (use sparingly)
```
### Strings
Use f-strings for formatting (Python 3.6+).
**Yes:**
```python
x = f"name: {name}; score: {score}"
```
**Acceptable:**
```python
x = "name: %s; score: %d" % (name, score)
x = "name: {}; score: {}".format(name, score)
```
**No:**
```python
x = "name: " + name + "; score: " + str(score) # Avoid + for formatting
```
#### Logging
Use `%` formatting for logging, not f-strings (allows lazy evaluation):
```python
logging.info("Request from %s resulted in %d", ip_address, status_code)
```
### Files and Resources
Always use context managers (`with` statements) for file operations:
```python
with open("file.txt") as f:
data = f.read()
```
### Statements
Generally avoid multiple statements on one line.
**Yes:**
```python
if foo:
bar()
```
**No:**
```python
if foo: bar() # Avoid
```
### Main
For executable scripts, use:
```python
def main():
...
if __name__ == "__main__":
main()
```
### Function Length
Keep functions focused and reasonably sized. If a function exceeds about 40 lines, consider splitting it unless it remains very readable.
## Type Annotation Details
### Forward Declarations
Use string quotes for forward references:
```python
class MyClass:
def method(self) -> "MyClass":
return self
```
### Type Aliases
Create aliases for complex types:
```python
from typing import TypeAlias
ConnectionOptions: TypeAlias = dict[str, str]
Address: TypeAlias = tuple[str, int]
Server: TypeAlias = tuple[Address, ConnectionOptions]
```
### TypeVars
Use descriptive names for TypeVars:
```python
from typing import TypeVar
_T = TypeVar("_T") # Good: private, unconstrained
AddableType = TypeVar("AddableType", int, float, str) # Good: descriptive
```
### Generics
Always specify type parameters for generic types:
**Yes:**
```python
def get_names(employee_ids: list[int]) -> dict[int, str]:
...
```
**No:**
```python
def get_names(employee_ids: list) -> dict: # Missing type parameters
...
```
### Imports for Typing
Import typing symbols directly:
```python
from collections.abc import Mapping, Sequence
from typing import Any, Union
# Use built-in types for containers (Python 3.9+)
def foo(items: list[str]) -> dict[str, int]:
...
```
## Common Patterns
### Properties
Use properties for simple attribute access:
```python
class Square:
def __init__(self, side: float):
self._side = side
@property
def area(self) -> float:
return self._side ** 2
```
### Conditional Expressions
Use ternary operators for simple conditions:
```python
x = "yes" if condition else "no"
```
### Context Managers
Create custom context managers when appropriate:
```python
from contextlib import contextmanager
@contextmanager
def managed_resource(*args, **kwargs):
resource = acquire_resource(*args, **kwargs)
try:
yield resource
finally:
release_resource(resource)
```
## Linting
Run `pylint` on all Python code. Suppress warnings only when necessary with clear explanations:
```python
dict = 'something' # pylint: disable=redefined-builtin
```
## Summary
When writing Python code:
1. Use type annotations for all functions
2. Follow naming conventions consistently
3. Write clear docstrings for all public APIs
4. Keep functions focused and reasonably sized
5. Use comprehensions for simple cases
6. Prefer implicit false in boolean contexts
7. Use f-strings for formatting
8. Always use context managers for resources
9. Run pylint and fix issues
10. **BE CONSISTENT** with existing code
## Additional Resources
For detailed reference on specific topics, see:
- **references/advanced_types.md** - Advanced type annotation patterns including Protocol, TypedDict, Literal, ParamSpec, and more
- **references/antipatterns.md** - Common Python mistakes and their fixes
- **references/docstring_examples.md** - Comprehensive docstring examples for all Python constructs

View File

@@ -0,0 +1,259 @@
# Advanced Type Annotations Reference
This document provides detailed guidance on advanced type annotation patterns in Python.
## Union Types
Use `|` (union operator) for Python 3.10+ or `Union` for earlier versions:
```python
# Python 3.10+
def process(value: int | str) -> None:
...
# Python 3.9 and earlier
from typing import Union
def process(value: Union[int, str]) -> None:
...
```
## Optional Types
`Optional[X]` is shorthand for `X | None`:
```python
from typing import Optional
# These are equivalent:
def foo(x: Optional[int]) -> None: ...
def foo(x: int | None) -> None: ... # Preferred in Python 3.10+
```
## Callable Types
For function types, use `Callable`:
```python
from collections.abc import Callable
def apply_func(func: Callable[[int, int], int], x: int, y: int) -> int:
return func(x, y)
# Callable[[arg1_type, arg2_type], return_type]
```
For functions with variable arguments:
```python
# Use ... for variable arguments
def accepts_any_callable(func: Callable[..., int]) -> None:
...
```
## Sequence, Mapping, and Iterable
Use abstract types from `collections.abc` when you don't need specific container features:
```python
from collections.abc import Sequence, Mapping, Iterable
def process_items(items: Sequence[str]) -> None:
"""Works with lists, tuples, or any sequence."""
...
def process_mapping(data: Mapping[str, int]) -> None:
"""Works with dicts or any mapping."""
...
def sum_numbers(nums: Iterable[int]) -> int:
"""Works with any iterable."""
return sum(nums)
```
## Protocol and Structural Subtyping
Define structural types using `Protocol`:
```python
from typing import Protocol
class Drawable(Protocol):
def draw(self) -> None:
...
def render(obj: Drawable) -> None:
obj.draw() # Any object with a draw() method works
```
## TypedDict for Structured Dictionaries
Use `TypedDict` for dictionaries with known keys:
```python
from typing import TypedDict
class Employee(TypedDict):
name: str
id: int
department: str
def process_employee(emp: Employee) -> None:
print(emp["name"]) # Type checker knows this key exists
```
Optional fields:
```python
from typing import TypedDict, NotRequired
class Employee(TypedDict):
name: str
id: int
department: NotRequired[str] # Optional field
```
## Literal Types
Use `Literal` for specific values:
```python
from typing import Literal
def set_mode(mode: Literal["read", "write", "append"]) -> None:
...
# Type checker ensures only these values are passed
set_mode("read") # OK
set_mode("delete") # Error
```
## Generic Classes
Create generic classes with `Generic`:
```python
from typing import Generic, TypeVar
T = TypeVar("T")
class Stack(Generic[T]):
def __init__(self) -> None:
self._items: list[T] = []
def push(self, item: T) -> None:
self._items.append(item)
def pop(self) -> T:
return self._items.pop()
# Usage
int_stack: Stack[int] = Stack()
int_stack.push(42)
```
## ParamSpec for Higher-Order Functions
Use `ParamSpec` to preserve function signatures:
```python
from typing import ParamSpec, TypeVar, Callable
P = ParamSpec("P")
R = TypeVar("R")
def log_calls(func: Callable[P, R]) -> Callable[P, R]:
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
print(f"Calling {func.__name__}")
return func(*args, **kwargs)
return wrapper
@log_calls
def greet(name: str, excited: bool = False) -> str:
return f"Hello, {name}{'!' if excited else '.'}"
# Type checker preserves the signature of greet
```
## TypeGuard for Type Narrowing
Use `TypeGuard` for custom type checking functions:
```python
from typing import TypeGuard
def is_str_list(val: list[object]) -> TypeGuard[list[str]]:
return all(isinstance(x, str) for x in val)
def process(items: list[object]) -> None:
if is_str_list(items):
# Type checker knows items is list[str] here
print(", ".join(items))
```
## Annotating *args and **kwargs
```python
def foo(*args: int, **kwargs: str) -> None:
# args is tuple[int, ...]
# kwargs is dict[str, str]
...
```
## Overload for Multiple Signatures
Use `@overload` for functions with different return types based on arguments:
```python
from typing import overload
@overload
def process(x: int) -> int: ...
@overload
def process(x: str) -> str: ...
def process(x: int | str) -> int | str:
if isinstance(x, int):
return x * 2
return x.upper()
```
## Self Type (Python 3.11+)
Use `Self` for methods that return the instance:
```python
from typing import Self
class Builder:
def add_item(self, item: str) -> Self:
self.items.append(item)
return self # Return type is automatically the class type
def build(self) -> dict:
return {"items": self.items}
```
For Python < 3.11, use TypeVar:
```python
from typing import TypeVar
TBuilder = TypeVar("TBuilder", bound="Builder")
class Builder:
def add_item(self: TBuilder, item: str) -> TBuilder:
self.items.append(item)
return self
```
## Best Practices
1. Use the most general type that works (e.g., `Sequence` over `list`)
2. Use `Protocol` for duck typing
3. Use `TypedDict` for structured dictionaries
4. Use `Literal` to restrict to specific values
5. Use `TypeGuard` for custom type narrowing
6. Always annotate public APIs
7. Use `Any` sparingly and explicitly when needed
8. Prefer built-in generic types (`list`, `dict`) over `typing` equivalents (Python 3.9+)

View File

@@ -0,0 +1,361 @@
# Python Anti-Patterns and Fixes
Common Python mistakes and their corrections.
## 1. Mutable Default Arguments
**Anti-pattern:**
```python
def add_item(item, items=[]): # WRONG
items.append(item)
return items
```
**Why it's wrong:** The list is created once when the function is defined, not each time it's called.
**Fix:**
```python
def add_item(item, items=None):
if items is None:
items = []
items.append(item)
return items
```
## 2. Bare Except Clauses
**Anti-pattern:**
```python
try:
risky_operation()
except: # WRONG - catches everything, including KeyboardInterrupt
handle_error()
```
**Fix:**
```python
try:
risky_operation()
except Exception as e: # Or specific exception types
logger.error(f"Operation failed: {e}")
handle_error()
```
## 3. Using == for None Comparisons
**Anti-pattern:**
```python
if value == None: # WRONG
...
```
**Fix:**
```python
if value is None:
...
```
**Why:** `is` checks identity, `==` checks equality. `None` is a singleton.
## 4. Comparing Boolean Values Explicitly
**Anti-pattern:**
```python
if flag == True: # WRONG
...
if len(items) > 0: # WRONG
...
```
**Fix:**
```python
if flag:
...
if items:
...
```
## 5. Not Using Context Managers for Files
**Anti-pattern:**
```python
f = open("file.txt") # WRONG - file may not close if error occurs
data = f.read()
f.close()
```
**Fix:**
```python
with open("file.txt") as f:
data = f.read()
```
## 6. String Concatenation in Loops
**Anti-pattern:**
```python
result = ""
for item in items:
result += str(item) # WRONG - creates new string each iteration
```
**Fix:**
```python
result = "".join(str(item) for item in items)
```
## 7. Modifying List While Iterating
**Anti-pattern:**
```python
for item in items:
if should_remove(item):
items.remove(item) # WRONG - skips elements
```
**Fix:**
```python
items = [item for item in items if not should_remove(item)]
# Or
items[:] = [item for item in items if not should_remove(item)]
```
## 8. Using eval() or exec()
**Anti-pattern:**
```python
user_input = get_user_input()
result = eval(user_input) # WRONG - major security risk
```
**Fix:**
```python
import ast
result = ast.literal_eval(user_input) # Only evaluates literals
```
## 9. Not Using enumerate()
**Anti-pattern:**
```python
i = 0
for item in items:
print(f"{i}: {item}")
i += 1
```
**Fix:**
```python
for i, item in enumerate(items):
print(f"{i}: {item}")
```
## 10. Creating Empty Lists/Dicts Unnecessarily
**Anti-pattern:**
```python
items = []
items.append(1)
items.append(2)
items.append(3)
```
**Fix:**
```python
items = [1, 2, 3]
```
## 11. Not Using dict.get() with Defaults
**Anti-pattern:**
```python
if key in my_dict:
value = my_dict[key]
else:
value = default
```
**Fix:**
```python
value = my_dict.get(key, default)
```
## 12. Using range(len()) Instead of enumerate()
**Anti-pattern:**
```python
for i in range(len(items)):
item = items[i]
print(f"{i}: {item}")
```
**Fix:**
```python
for i, item in enumerate(items):
print(f"{i}: {item}")
```
## 13. Not Using Collections Module
**Anti-pattern:**
```python
word_counts = {}
for word in words:
if word in word_counts:
word_counts[word] += 1
else:
word_counts[word] = 1
```
**Fix:**
```python
from collections import Counter
word_counts = Counter(words)
```
## 14. Not Using defaultdict
**Anti-pattern:**
```python
groups = {}
for item in items:
key = get_key(item)
if key not in groups:
groups[key] = []
groups[key].append(item)
```
**Fix:**
```python
from collections import defaultdict
groups = defaultdict(list)
for item in items:
key = get_key(item)
groups[key].append(item)
```
## 15. Overly Complex Comprehensions
**Anti-pattern:**
```python
result = [
transform(x)
for x in items
if condition1(x)
if condition2(x)
if condition3(x)
for y in x.sub_items
if condition4(y)
] # WRONG - too complex
```
**Fix:**
```python
result = []
for x in items:
if condition1(x) and condition2(x) and condition3(x):
for y in x.sub_items:
if condition4(y):
result.append(transform(x))
```
## 16. Not Using Path Objects
**Anti-pattern:**
```python
import os
path = os.path.join(dir_name, "file.txt")
if os.path.exists(path):
with open(path) as f:
...
```
**Fix:**
```python
from pathlib import Path
path = Path(dir_name) / "file.txt"
if path.exists():
with path.open() as f:
...
```
## 17. String Formatting with + or %
**Anti-pattern:**
```python
message = "Hello, " + name + "! You have " + str(count) + " messages."
message = "Hello, %s! You have %d messages." % (name, count)
```
**Fix:**
```python
message = f"Hello, {name}! You have {count} messages."
```
## 18. Not Using dataclasses
**Anti-pattern:**
```python
class Point:
def __init__(self, x, y):
self.x = x
self.y = y
def __repr__(self):
return f"Point(x={self.x}, y={self.y})"
def __eq__(self, other):
return self.x == other.x and self.y == other.y
```
**Fix:**
```python
from dataclasses import dataclass
@dataclass
class Point:
x: float
y: float
```
## 19. Lambda Abuse
**Anti-pattern:**
```python
process = lambda x: x.strip().lower().replace(" ", "_")[:20] # WRONG
```
**Fix:**
```python
def process(x: str) -> str:
"""Clean and truncate string."""
return x.strip().lower().replace(" ", "_")[:20]
```
## 20. Not Using Sets for Membership Testing
**Anti-pattern:**
```python
valid_codes = ["A1", "A2", "A3", ...] # Long list
if code in valid_codes: # O(n) lookup
...
```
**Fix:**
```python
valid_codes = {"A1", "A2", "A3", ...} # Set
if code in valid_codes: # O(1) lookup
...
```
## Summary
Key principles to avoid anti-patterns:
1. Use built-in functions and standard library when possible
2. Leverage context managers for resource management
3. Use appropriate data structures (sets for membership, Counter for counting)
4. Keep code readable and idiomatic
5. Use modern Python features (f-strings, dataclasses, Path)
6. Avoid premature optimization
7. Write explicit, clear code over clever code

View File

@@ -0,0 +1,384 @@
# Docstring Examples
Complete examples of Google-style docstrings for various Python constructs.
## Module Docstring
```python
"""This is an example module docstring.
This module provides utilities for processing user data. It includes functions
for validation, transformation, and persistence of user information.
Typical usage example:
user = create_user("John Doe", "john@example.com")
validate_user(user)
save_user(user)
"""
```
## Function Docstrings
### Simple Function
```python
def greet(name: str) -> str:
"""Returns a greeting message.
Args:
name: The name of the person to greet.
Returns:
A greeting string.
"""
return f"Hello, {name}!"
```
### Function with Multiple Arguments
```python
def calculate_total(
price: float,
quantity: int,
discount: float = 0.0,
tax_rate: float = 0.0
) -> float:
"""Calculates the total cost including discount and tax.
Args:
price: The unit price of the item.
quantity: The number of items.
discount: The discount as a decimal (e.g., 0.1 for 10% off).
Defaults to 0.0.
tax_rate: The tax rate as a decimal (e.g., 0.08 for 8% tax).
Defaults to 0.0.
Returns:
The total cost after applying discount and tax.
Raises:
ValueError: If price or quantity is negative.
"""
if price < 0 or quantity < 0:
raise ValueError("Price and quantity must be non-negative")
subtotal = price * quantity * (1 - discount)
return subtotal * (1 + tax_rate)
```
### Function with Complex Return Type
```python
def parse_config(
config_path: str
) -> tuple[dict[str, str], list[str]]:
"""Parses a configuration file.
Args:
config_path: Path to the configuration file.
Returns:
A tuple containing:
- A dictionary of configuration key-value pairs.
- A list of warning messages encountered during parsing.
Raises:
FileNotFoundError: If the config file doesn't exist.
ValueError: If the config file is malformed.
"""
...
```
### Function with Side Effects
```python
def update_database(
user_id: int,
data: dict[str, Any]
) -> None:
"""Updates user data in the database.
Note:
This function modifies the database directly. Ensure proper
transaction handling in the calling code.
Args:
user_id: The ID of the user to update.
data: Dictionary containing fields to update.
Raises:
DatabaseError: If the database operation fails.
ValueError: If user_id is invalid or data is empty.
"""
...
```
## Class Docstrings
### Simple Class
```python
class User:
"""Represents a user in the system.
Attributes:
username: The user's unique username.
email: The user's email address.
created_at: Timestamp when the user was created.
"""
def __init__(self, username: str, email: str):
"""Initializes a new User.
Args:
username: The desired username.
email: The user's email address.
"""
self.username = username
self.email = email
self.created_at = datetime.now()
```
### Complex Class with Properties
```python
class Rectangle:
"""Represents a rectangle with width and height.
This class provides methods for calculating area and perimeter,
and properties for accessing dimensions.
Attributes:
width: The width of the rectangle.
height: The height of the rectangle.
Example:
>>> rect = Rectangle(10, 5)
>>> rect.area
50
>>> rect.perimeter
30
"""
def __init__(self, width: float, height: float):
"""Initializes a Rectangle.
Args:
width: The width of the rectangle. Must be positive.
height: The height of the rectangle. Must be positive.
Raises:
ValueError: If width or height is not positive.
"""
if width <= 0 or height <= 0:
raise ValueError("Width and height must be positive")
self._width = width
self._height = height
@property
def width(self) -> float:
"""Gets the width of the rectangle."""
return self._width
@width.setter
def width(self, value: float) -> None:
"""Sets the width of the rectangle.
Args:
value: The new width. Must be positive.
Raises:
ValueError: If value is not positive.
"""
if value <= 0:
raise ValueError("Width must be positive")
self._width = value
@property
def area(self) -> float:
"""Calculates and returns the area of the rectangle."""
return self._width * self._height
@property
def perimeter(self) -> float:
"""Calculates and returns the perimeter of the rectangle."""
return 2 * (self._width + self._height)
```
## Generator Functions
```python
def fibonacci(n: int) -> Iterator[int]:
"""Generates the first n Fibonacci numbers.
Args:
n: The number of Fibonacci numbers to generate.
Yields:
The next Fibonacci number in the sequence.
Raises:
ValueError: If n is negative.
Example:
>>> list(fibonacci(5))
[0, 1, 1, 2, 3]
"""
if n < 0:
raise ValueError("n must be non-negative")
a, b = 0, 1
for _ in range(n):
yield a
a, b = b, a + b
```
## Exception Classes
```python
class InvalidUserError(Exception):
"""Raised when user data is invalid.
This exception is raised during user validation when the provided
data doesn't meet the required criteria.
Attributes:
username: The invalid username that caused the error.
message: Explanation of the validation failure.
"""
def __init__(self, username: str, message: str):
"""Initializes the exception.
Args:
username: The username that failed validation.
message: Description of why validation failed.
"""
self.username = username
self.message = message
super().__init__(f"{username}: {message}")
```
## Context Manager
```python
class DatabaseConnection:
"""Context manager for database connections.
Automatically handles connection setup and teardown.
Example:
>>> with DatabaseConnection("localhost", 5432) as conn:
... conn.execute("SELECT * FROM users")
"""
def __init__(self, host: str, port: int):
"""Initializes the database connection parameters.
Args:
host: The database host address.
port: The database port number.
"""
self.host = host
self.port = port
self._connection = None
def __enter__(self) -> "DatabaseConnection":
"""Establishes the database connection.
Returns:
The DatabaseConnection instance.
Raises:
ConnectionError: If connection cannot be established.
"""
self._connection = create_connection(self.host, self.port)
return self
def __exit__(self, exc_type, exc_val, exc_tb) -> bool:
"""Closes the database connection.
Args:
exc_type: The exception type, if an exception occurred.
exc_val: The exception value, if an exception occurred.
exc_tb: The exception traceback, if an exception occurred.
Returns:
False to propagate exceptions, True to suppress them.
"""
if self._connection:
self._connection.close()
return False
```
## Async Functions
```python
async def fetch_data(url: str, timeout: float = 30.0) -> dict[str, Any]:
"""Asynchronously fetches data from a URL.
Args:
url: The URL to fetch data from.
timeout: Maximum time to wait for response in seconds.
Defaults to 30.0.
Returns:
A dictionary containing the fetched data.
Raises:
aiohttp.ClientError: If the request fails.
asyncio.TimeoutError: If the request times out.
Example:
>>> data = await fetch_data("https://api.example.com/data")
"""
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=timeout) as response:
return await response.json()
```
## Test Functions
```python
def test_user_creation():
"""Tests that User objects are created correctly.
This test verifies:
- Username is set correctly
- Email is set correctly
- created_at is set to current time
"""
user = User("john_doe", "john@example.com")
assert user.username == "john_doe"
assert user.email == "john@example.com"
assert isinstance(user.created_at, datetime)
```
## Docstring Sections
Common sections in Google-style docstrings:
- **Args:** Function/method parameters
- **Returns:** Return value description
- **Yields:** For generator functions
- **Raises:** Exceptions that may be raised
- **Attributes:** For classes, describes instance attributes
- **Example:** Usage examples
- **Note:** Important notes or warnings
- **Warning:** Critical warnings
- **Todo:** Planned improvements
- **See Also:** Related functions or classes
## Style Guidelines
1. Use triple double quotes (`"""`) for all docstrings
2. First line is a brief summary (one sentence, no period needed if one line)
3. Leave a blank line before sections (Args, Returns, etc.)
4. Capitalize section headers
5. Use imperative mood ("Returns" not "Return")
6. Be specific and concise
7. Include type information in Args and Returns when not obvious from annotations
8. Always document exceptions that can be raised
9. Include examples for complex functions
10. Keep line length under 80 characters where possible

View File

@@ -0,0 +1,214 @@
---
name: r-development
description: Modern R development practices emphasizing tidyverse patterns (dplyr 1.1 and later, native pipe, join_by, .by grouping), rlang metaprogramming, performance optimization, and package development. Use when Claude needs to write R code, create R packages, optimize R performance, or provide R programming guidance.
---
# R Development
This skill provides comprehensive guidance for modern R development, emphasizing current best practices with tidyverse, performance optimization, and professional package development.
## Core Principles
1. **Use modern tidyverse patterns** - Prioritize dplyr 1.1+ features, native pipe, and current APIs
2. **Profile before optimizing** - Use profvis and bench to identify real bottlenecks
3. **Write readable code first** - Optimize only when necessary and after profiling
4. **Follow tidyverse style guide** - Consistent naming, spacing, and structure
## Modern Tidyverse Essentials
### Native Pipe (`|>` not `%>%`)
Always use native pipe `|>` instead of magrittr `%>%` (R 4.1+):
```r
# Modern
data |>
filter(year >= 2020) |>
summarise(mean_value = mean(value))
# Avoid legacy pipe
data %>% filter(year >= 2020)
```
### Join Syntax (dplyr 1.1+)
Use `join_by()` for all joins:
```r
# Modern join syntax with equality
transactions |>
inner_join(companies, by = join_by(company == id))
# Inequality joins
transactions |>
inner_join(companies, join_by(company == id, year >= since))
# Rolling joins (closest match)
transactions |>
inner_join(companies, join_by(company == id, closest(year >= since)))
```
Control match behavior:
```r
# Expect 1:1 matches
inner_join(x, y, by = join_by(id), multiple = "error")
# Ensure all rows match
inner_join(x, y, by = join_by(id), unmatched = "error")
```
### Per-Operation Grouping with `.by`
Use `.by` instead of `group_by() |> ... |> ungroup()`:
```r
# Modern approach (always returns ungrouped)
data |>
summarise(mean_value = mean(value), .by = category)
# Multiple grouping variables
data |>
summarise(total = sum(revenue), .by = c(company, year))
```
### Column Operations
Use modern column selection and transformation functions:
```r
# pick() for column selection in data-masking contexts
data |>
summarise(
n_x_cols = ncol(pick(starts_with("x"))),
n_y_cols = ncol(pick(starts_with("y")))
)
# across() for applying functions to multiple columns
data |>
summarise(across(where(is.numeric), mean, .names = "mean_{.col}"), .by = group)
# reframe() for multi-row results per group
data |>
reframe(quantiles = quantile(x, c(0.25, 0.5, 0.75)), .by = group)
```
## rlang Metaprogramming
For comprehensive rlang patterns, see [references/rlang-patterns.md](references/rlang-patterns.md).
### Quick Reference
- **`{{}}`** - Forward function arguments to data-masking functions
- **`!!`** - Inject single expressions or values
- **`!!!`** - Inject multiple arguments from a list
- **`.data[[]]`** - Access columns by name (character vectors)
- **`pick()`** - Select columns inside data-masking functions
Example function with embracing:
```r
my_summary <- function(data, group_var, summary_var) {
data |>
summarise(mean_val = mean({{ summary_var }}), .by = {{ group_var }})
}
```
## Performance Optimization
For detailed performance guidance, see [references/performance.md](references/performance.md).
### Key Strategies
1. **Profile first**: Use `profvis::profvis()` and `bench::mark()`
2. **Vectorize operations**: Avoid loops when vectorized alternatives exist
3. **Use dtplyr**: For large data operations (lazy evaluation with data.table backend)
4. **Parallel processing**: Use `furrr::future_map()` for parallelizable work
5. **Memory efficiency**: Pre-allocate, use appropriate data types
Quick example:
```r
# Profile code
profvis::profvis({
result <- data |>
complex_operation() |>
another_operation()
})
# Benchmark alternatives
bench::mark(
approach_1 = method1(data),
approach_2 = method2(data),
check = FALSE
)
```
## Package Development
For complete package development guidance, see [references/package-development.md](references/package-development.md).
### Quick Guidelines
**API Design:**
- Use `.by` parameter for per-operation grouping
- Use `{{}}` for column arguments
- Return tibbles consistently
- Validate user-facing function inputs thoroughly
**Dependencies:**
- Add dependencies for significant functionality gains
- Core tidyverse packages usually worth including: dplyr, purrr, stringr, tidyr
- Minimize dependencies for widely-used packages
**Testing:**
- Unit tests for individual functions
- Integration tests for workflows
- Test edge cases and error conditions
**Documentation:**
- Document all exported functions
- Provide usage examples
- Explain non-obvious parameter interactions
## Common Migration Patterns
### Base R → Tidyverse
```r
# Data manipulation
subset(data, condition)filter(data, condition)
data[order(data$x), ]arrange(data, x)
aggregate(x ~ y, data, mean)summarise(data, mean(x), .by = y)
# Functional programming
sapply(x, f)map(x, f) # type-stable
lapply(x, f)map(x, f)
# Strings
grepl("pattern", text)str_detect(text, "pattern")
gsub("old", "new", text)str_replace_all(text, "old", "new")
```
### Old → New Tidyverse
```r
# Pipes
%>%|>
# Grouping
group_by() |> ... |> ungroup()summarise(..., .by = x)
# Joins
by = c("a" = "b")by = join_by(a == b)
# Reshaping
gather()/spread()pivot_longer()/pivot_wider()
```
## Additional Resources
- **rlang patterns**: See [references/rlang-patterns.md](references/rlang-patterns.md) for comprehensive data-masking and metaprogramming guidance
- **Performance optimization**: See [references/performance.md](references/performance.md) for profiling, benchmarking, and optimization strategies
- **Package development**: See [references/package-development.md](references/package-development.md) for complete package creation guidance
- **Object systems**: See [references/object-systems.md](references/object-systems.md) for S3, S4, S7, R6, and vctrs guidance

View File

@@ -0,0 +1,310 @@
# Object-Oriented Programming in R
## S7: Modern OOP for New Projects
S7 combines S3 simplicity with S4 structure:
- Formal class definitions with automatic validation
- Compatible with existing S3 code
- Better error messages and discoverability
```r
# S7 class definition
Range <- new_class("Range",
properties = list(
start = class_double,
end = class_double
),
validator = function(self) {
if (self@end < self@start) {
"@end must be >= @start"
}
}
)
# Usage - constructor and property access
x <- Range(start = 1, end = 10)
x@start # 1
x@end <- 20 # automatic validation
# Methods
inside <- new_generic("inside", "x")
method(inside, Range) <- function(x, y) {
y >= x@start & y <= x@end
}
```
## OOP System Decision Matrix
### Decision Tree: What Are You Building?
#### 1. Vector-like Objects
**Use vctrs when:**
- ✓ Need data frame integration (columns/rows)
- ✓ Want type-stable vector operations
- ✓ Building factor-like, date-like, or numeric-like classes
- ✓ Need consistent coercion/casting behavior
- ✓ Working with existing tidyverse infrastructure
**Examples:** custom date classes, units, categorical data
```r
# Vector-like behavior in data frames
percent <- new_vctr(0.5, class = "percentage")
data.frame(x = 1:3, pct = percent(c(0.1, 0.2, 0.3))) # works seamlessly
# Type-stable operations
vec_c(percent(0.1), percent(0.2)) # predictable behavior
vec_cast(0.5, percent()) # explicit, safe casting
```
#### 2. General Objects (Complex Data Structures)
**Use S7 when:**
- ✓ NEW projects that need formal classes
- ✓ Want property validation and safe property access (@)
- ✓ Need multiple dispatch (beyond S3's double dispatch)
- ✓ Converting from S3 and want better structure
- ✓ Building class hierarchies with inheritance
- ✓ Want better error messages and discoverability
```r
# Complex validation needs
Range <- new_class("Range",
properties = list(start = class_double, end = class_double),
validator = function(self) {
if (self@end < self@start) "@end must be >= @start"
}
)
# Multiple dispatch needs
method(generic, list(ClassA, ClassB)) <- function(x, y) ...
# Class hierarchies with clear inheritance
Child <- new_class("Child", parent = Parent)
```
**Use S3 when:**
- ✓ Simple classes with minimal structure needs
- ✓ Maximum compatibility and minimal dependencies
- ✓ Quick prototyping or internal classes
- ✓ Contributing to existing S3-based ecosystems
- ✓ Performance is absolutely critical (minimal overhead)
```r
# Simple classes without complex needs
new_simple <- function(x) structure(x, class = "simple")
print.simple <- function(x, ...) cat("Simple:", x)
```
**Use S4 when:**
- ✓ Working in Bioconductor ecosystem
- ✓ Need complex multiple inheritance (S7 doesn't support this)
- ✓ Existing S4 codebase that works well
**Use R6 when:**
- ✓ Need reference semantics (mutable objects)
- ✓ Building stateful objects
- ✓ Coming from OOP languages like Python/Java
- ✓ Need encapsulation and private methods
## Detailed S7 vs S3 Comparison
| Feature | S3 | S7 | When S7 wins |
|---------|----|----|---------------|
| **Class definition** | Informal (convention) | Formal (`new_class()`) | Need guaranteed structure |
| **Property access** | `$` or `attr()` (unsafe) | `@` (safe, validated) | Property validation matters |
| **Validation** | Manual, inconsistent | Built-in validators | Data integrity important |
| **Method discovery** | Hard to find methods | Clear method printing | Developer experience matters |
| **Multiple dispatch** | Limited (base generics) | Full multiple dispatch | Complex method dispatch needed |
| **Inheritance** | Informal, `NextMethod()` | Explicit `super()` | Predictable inheritance needed |
| **Migration cost** | - | Low (1-2 hours) | Want better structure |
| **Performance** | Fastest | ~Same as S3 | Performance difference negligible |
| **Compatibility** | Full S3 | Full S3 + S7 | Need both old and new patterns |
## vctrs for Vector Classes
### Basic Vector Class
```r
# Constructor (low-level)
new_percent <- function(x = double()) {
vec_assert(x, double())
new_vctr(x, class = "pkg_percent")
}
# Helper (user-facing)
percent <- function(x = double()) {
x <- vec_cast(x, double())
new_percent(x)
}
# Format method
format.pkg_percent <- function(x, ...) {
paste0(vec_data(x) * 100, "%")
}
```
### Coercion Methods
```r
# Self-coercion
vec_ptype2.pkg_percent.pkg_percent <- function(x, y, ...) {
new_percent()
}
# With double
vec_ptype2.pkg_percent.double <- function(x, y, ...) double()
vec_ptype2.double.pkg_percent <- function(x, y, ...) double()
# Casting
vec_cast.pkg_percent.double <- function(x, to, ...) {
new_percent(x)
}
vec_cast.double.pkg_percent <- function(x, to, ...) {
vec_data(x)
}
```
## S3 Basics
### Creating S3 Classes
```r
# Constructor
new_myclass <- function(x, y) {
structure(
list(x = x, y = y),
class = "myclass"
)
}
# Methods
print.myclass <- function(x, ...) {
cat("myclass object\n")
cat("x:", x$x, "\n")
cat("y:", x$y, "\n")
}
summary.myclass <- function(object, ...) {
list(x = object$x, y = object$y)
}
```
### Generic Functions
```r
# Create generic
my_generic <- function(x, ...) {
UseMethod("my_generic")
}
# Default method
my_generic.default <- function(x, ...) {
stop("No method for class ", class(x))
}
# Specific method
my_generic.myclass <- function(x, ...) {
# Implementation
}
```
## R6 Classes
### Basic R6 Class
```r
library(R6)
MyClass <- R6Class("MyClass",
public = list(
x = NULL,
y = NULL,
initialize = function(x, y) {
self$x <- x
self$y <- y
},
add = function() {
self$x + self$y
}
),
private = list(
internal_value = NULL
)
)
# Usage
obj <- MyClass$new(1, 2)
obj$add() # 3
```
## Migration Strategy
### S3 → S7
Usually 1-2 hours work, keeps full compatibility:
```r
# S3 version
new_range <- function(start, end) {
structure(
list(start = start, end = end),
class = "range"
)
}
# S7 version
Range <- new_class("Range",
properties = list(
start = class_double,
end = class_double
)
)
```
### S4 → S7
More complex, evaluate if S4 features are actually needed.
### Base R → vctrs
For vector-like classes, significant benefits in type stability and data frame integration.
### Combining Approaches
S7 classes can use vctrs principles internally for vector-like properties.
## When to Use Each System
### Use S7 for:
- New projects needing formal OOP
- Class validation and type safety
- Multiple dispatch
- Better developer experience
### Use vctrs for:
- Vector-like classes
- Data frame columns
- Type-stable operations
- Tidyverse integration
### Use S3 for:
- Simple classes
- Maximum compatibility
- Existing S3 ecosystems
- Quick prototypes
### Use S4 for:
- Bioconductor packages
- Complex multiple inheritance
- Existing S4 codebases
### Use R6 for:
- Mutable state
- Reference semantics
- Encapsulation needs
- Coming from OOP languages

View File

@@ -0,0 +1,393 @@
# Package Development
## Dependency Strategy
### When to Add Dependencies vs Base R
```r
# Add dependency when:
Significant functionality gain
Maintenance burden reduction
User experience improvement
Complex implementation (regex, dates, web)
# Use base R when:
Simple utility functions
Package will be widely used (minimize deps)
Dependency is large for small benefit
Base R solution is straightforward
# Example decisions:
str_detect(x, "pattern") # Worth stringr dependency
length(x) > 0 # Don't need purrr for this
parse_dates(x) # Worth lubridate dependency
x + 1 # Don't need dplyr for this
```
### Tidyverse Dependency Guidelines
```r
# Core tidyverse (usually worth it):
dplyr # Complex data manipulation
purrr # Functional programming, parallel
stringr # String manipulation
tidyr # Data reshaping
# Specialized tidyverse (evaluate carefully):
lubridate # If heavy date manipulation
forcats # If many categorical operations
readr # If specific file reading needs
ggplot2 # If package creates visualizations
# Heavy dependencies (use sparingly):
tidyverse # Meta-package, very heavy
shiny # Only for interactive apps
```
## API Design Patterns
### Function Design Strategy
```r
# Modern tidyverse API patterns
# 1. Use .by for per-operation grouping
my_summarise <- function(.data, ..., .by = NULL) {
# Support modern grouped operations
}
# 2. Use {{ }} for user-provided columns
my_select <- function(.data, cols) {
.data |> select({{ cols }})
}
# 3. Use ... for flexible arguments
my_mutate <- function(.data, ..., .by = NULL) {
.data |> mutate(..., .by = {{ .by }})
}
# 4. Return consistent types (tibbles, not data.frames)
my_function <- function(.data) {
result |> tibble::as_tibble()
}
```
### Input Validation Strategy
```r
# Validation level by function type:
# User-facing functions - comprehensive validation
user_function <- function(x, threshold = 0.5) {
# Check all inputs thoroughly
if (!is.numeric(x)) stop("x must be numeric")
if (!is.numeric(threshold) || length(threshold) != 1) {
stop("threshold must be a single number")
}
# ... function body
}
# Internal functions - minimal validation
.internal_function <- function(x, threshold) {
# Assume inputs are valid (document assumptions)
# Only check critical invariants
# ... function body
}
# Package functions with vctrs - type-stable validation
safe_function <- function(x, y) {
x <- vec_cast(x, double())
y <- vec_cast(y, double())
# Automatic type checking and coercion
}
```
## Error Handling Patterns
```r
# Good error messages - specific and actionable
if (length(x) == 0) {
cli::cli_abort(
"Input {.arg x} cannot be empty.",
"i" = "Provide a non-empty vector."
)
}
# Include function name in errors
validate_input <- function(x, call = caller_env()) {
if (!is.numeric(x)) {
cli::cli_abort("Input must be numeric", call = call)
}
}
# Use consistent error styling
# cli package for user-friendly messages
# rlang for developer tools
```
## When to Create Internal vs Exported Functions
### Export Function When:
```r
Users will call it directly
Other packages might want to extend it
Part of the core package functionality
Stable API that won't change often
# Example: main data processing functions
export_these <- function(.data, ...) {
# Comprehensive input validation
# Full documentation required
# Stable API contract
}
```
### Keep Function Internal When:
```r
Implementation detail that may change
Only used within package
Complex implementation helpers
Would clutter user-facing API
# Example: helper functions
.internal_helper <- function(x, y) {
# Minimal documentation
# Can change without breaking users
# Assume inputs are pre-validated
}
```
## Testing and Documentation Strategy
### Testing Levels
```r
# Unit tests - individual functions
test_that("function handles edge cases", {
expect_equal(my_func(c()), expected_empty_result)
expect_error(my_func(NULL), class = "my_error_class")
})
# Integration tests - workflow combinations
test_that("pipeline works end-to-end", {
result <- data |>
step1() |>
step2() |>
step3()
expect_s3_class(result, "expected_class")
})
# Property-based tests for package functions
test_that("function properties hold", {
# Test invariants across many inputs
})
```
### Testing rlang Functions
```r
# Test data-masking behavior
test_that("function supports data masking", {
result <- my_function(mtcars, cyl)
expect_equal(names(result), "mean_cyl")
# Test with expressions
result2 <- my_function(mtcars, cyl * 2)
expect_true("mean_cyl * 2" %in% names(result2))
})
# Test injection behavior
test_that("function supports injection", {
var <- "cyl"
result <- my_function(mtcars, !!sym(var))
expect_true(nrow(result) > 0)
})
```
### Documentation Priorities
```r
# Must document:
All exported functions
Complex algorithms or formulas
Non-obvious parameter interactions
Examples of typical usage
# Can skip documentation:
Simple internal helpers
Obvious parameter meanings
Functions that just call other functions
```
### Documentation Tags for rlang
```r
#' @param var <[`data-masked`][dplyr::dplyr_data_masking]> Column to summarize
#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Additional grouping variables
#' @param cols <[`tidy-select`][dplyr::dplyr_tidy_select]> Columns to select
```
## Package Structure
### DESCRIPTION File
```r
Package: mypackage
Title: What the Package Does (One Line, Title Case)
Version: 0.1.0
Authors@R: person("First", "Last", email = "email@example.com", role = c("aut", "cre"))
Description: What the package does (one paragraph).
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Imports:
dplyr (>= 1.1.0),
rlang (>= 1.1.0),
cli
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
```
### NAMESPACE Management
Use roxygen2 for NAMESPACE management:
```r
# Import specific functions
#' @importFrom rlang := enquo enquos
#' @importFrom dplyr mutate filter
# Or import entire packages (use sparingly)
#' @import dplyr
```
### rlang Import Strategy
```r
# In DESCRIPTION:
Imports: rlang
# In NAMESPACE, import specific functions:
importFrom(rlang, enquo, enquos, expr, !!!, :=)
# Or import key functions:
#' @importFrom rlang := enquo enquos
```
## Naming Conventions
```r
# Good naming: snake_case for variables/functions
calculate_mean_score <- function(data, score_col) {
# Function body
}
# Prefix non-standard arguments with .
my_function <- function(.data, ...) {
# Reduces argument conflicts
}
# Internal functions start with .
.internal_helper <- function(x, y) {
# Not exported
}
```
## Style Guide Essentials
### Object Names
- Use snake_case for all names
- Variable names = nouns, function names = verbs
- Avoid dots except for S3 methods
```r
# Good
day_one
calculate_mean
user_data
# Avoid
DayOne
calculate.mean
userData
```
### Spacing and Layout
```r
# Good spacing
x[, 1]
mean(x, na.rm = TRUE)
if (condition) {
action()
}
# Pipe formatting
data |>
filter(year >= 2020) |>
group_by(category) |>
summarise(
mean_value = mean(value),
count = n()
)
```
## Package Development Workflow
1. **Setup**: Use `usethis::create_package()`
2. **Add functions**: Place in `R/` directory
3. **Document**: Use roxygen2 comments
4. **Test**: Write tests in `tests/testthat/`
5. **Check**: Run `devtools::check()`
6. **Build**: Use `devtools::build()`
7. **Install**: Use `devtools::install()`
### Key usethis Functions
```r
# Initial setup
usethis::create_package("mypackage")
usethis::use_git()
usethis::use_mit_license()
# Add dependencies
usethis::use_package("dplyr")
usethis::use_package("testthat", "Suggests")
# Add infrastructure
usethis::use_readme_md()
usethis::use_news_md()
usethis::use_testthat()
# Add files
usethis::use_r("my_function")
usethis::use_test("my_function")
usethis::use_vignette("introduction")
```
## Common Pitfalls
### What to Avoid
```r
# Don't use library() in packages
# Use Imports in DESCRIPTION instead
# Don't use source()
# Use proper function dependencies
# Don't use attach()
# Always use explicit :: notation
# Don't modify global options without restoring
old <- options(stringsAsFactors = FALSE)
on.exit(options(old), add = TRUE)
# Don't use setwd()
# Use here::here() or relative paths
```

View File

@@ -0,0 +1,311 @@
# Performance Optimization
## Performance Tool Selection Guide
### Profiling Tools Decision Matrix
| Tool | Use When | Don't Use When | What It Shows |
|------|----------|----------------|---------------|
| **`profvis`** | Complex code, unknown bottlenecks | Simple functions, known issues | Time per line, call stack |
| **`bench::mark()`** | Comparing alternatives | Single approach | Relative performance, memory |
| **`system.time()`** | Quick checks | Detailed analysis | Total runtime only |
| **`Rprof()`** | Base R only environments | When profvis available | Raw profiling data |
### Step-by-Step Performance Workflow
```r
# 1. Profile first - find the actual bottlenecks
library(profvis)
profvis({
# Your slow code here
})
# 2. Focus on the slowest parts (80/20 rule)
# Don't optimize until you know where time is spent
# 3. Benchmark alternatives for hot spots
library(bench)
bench::mark(
current = current_approach(data),
vectorized = vectorized_approach(data),
parallel = map(data, in_parallel(func))
)
# 4. Consider tool trade-offs based on bottleneck type
```
## When Each Tool Helps vs Hurts
### Parallel Processing (`in_parallel()`)
```r
# Helps when:
CPU-intensive computations
Embarrassingly parallel problems
Large datasets with independent operations
I/O bound operations (file reading, API calls)
# Hurts when:
Simple, fast operations (overhead > benefit)
Memory-intensive operations (may cause thrashing)
Operations requiring shared state
Small datasets
# Example decision point:
expensive_func <- function(x) Sys.sleep(0.1) # 100ms per call
fast_func <- function(x) x^2 # microseconds per call
# Good for parallel
map(1:100, in_parallel(expensive_func)) # ~10s -> ~2.5s on 4 cores
# Bad for parallel (overhead > benefit)
map(1:100, in_parallel(fast_func)) # 100μs -> 50ms (500x slower!)
```
### vctrs Backend Tools
```r
# Use vctrs when:
Type safety matters more than raw speed
Building reusable package functions
Complex coercion/combination logic
Consistent behavior across edge cases
# Avoid vctrs when:
One-off scripts where speed matters most
Simple operations where base R is sufficient
Memory is extremely constrained
# Decision point:
simple_combine <- function(x, y) c(x, y) # Fast, simple
robust_combine <- function(x, y) vec_c(x, y) # Safer, slight overhead
# Use simple for hot loops, robust for package APIs
```
### Data Backend Selection
```r
# Use data.table when:
Very large datasets (>1GB)
Complex grouping operations
Reference semantics desired
Maximum performance critical
# Use dplyr when:
Readability and maintainability priority
Complex joins and window functions
Team familiarity with tidyverse
Moderate sized data (<100MB)
# Use dtplyr (dplyr with data.table backend) when:
Want dplyr syntax with data.table performance
Large data but team prefers tidyverse
Lazy evaluation desired
# Use base R when:
No dependencies allowed
Simple operations
Teaching/learning contexts
```
## Profiling Best Practices
```r
# 1. Profile realistic data sizes
profvis({
# Use actual data size, not toy examples
real_data |> your_analysis()
})
# 2. Profile multiple runs for stability
bench::mark(
your_function(data),
min_iterations = 10, # Multiple runs
max_iterations = 100
)
# 3. Check memory usage too
bench::mark(
approach1 = method1(data),
approach2 = method2(data),
check = FALSE, # If outputs differ slightly
filter_gc = FALSE # Include GC time
)
# 4. Profile with realistic usage patterns
# Not just isolated function calls
```
## Performance Anti-Patterns to Avoid
```r
# Don't optimize without measuring
# ✗ "This looks slow" -> immediately rewrite
# ✓ Profile first, optimize bottlenecks
# Don't over-engineer for performance
# ✗ Complex optimizations for 1% gains
# ✓ Focus on algorithmic improvements
# Don't assume - measure
# ✗ "for loops are always slow in R"
# ✓ Benchmark your specific use case
# Don't ignore readability costs
# ✗ Unreadable code for minor speedups
# ✓ Readable code with targeted optimizations
# Don't grow objects in loops
# ✗ result <- c(); for(i in 1:n) result <- c(result, x[i])
# ✓ result <- vector("list", n); for(i in 1:n) result[[i]] <- x[i]
```
## Modern purrr Patterns for Performance
Use modern purrr 1.0+ patterns:
```r
# Modern data frame row binding (purrr 1.0+)
models <- data_splits |>
map(\(split) train_model(split)) |>
list_rbind() # Replaces map_dfr()
# Column binding
summaries <- data_list |>
map(\(df) get_summary_stats(df)) |>
list_cbind() # Replaces map_dfc()
# Side effects with walk()
plots <- walk2(data_list, plot_names, \(df, name) {
p <- ggplot(df, aes(x, y)) + geom_point()
ggsave(name, p)
})
# Parallel processing (purrr 1.1.0+)
library(mirai)
daemons(4)
results <- large_datasets |>
map(in_parallel(expensive_computation))
daemons(0)
```
## Vectorization
```r
# Good - vectorized operations
result <- x + y
# Good - Type-stable purrr functions
map_dbl(data, mean) # always returns double
map_chr(data, class) # always returns character
# Avoid - Type-unstable base functions
sapply(data, mean) # might return list or vector
# Avoid - explicit loops for simple operations
result <- numeric(length(x))
for(i in seq_along(x)) {
result[i] <- x[i] + y[i]
}
```
## Using dtplyr for Large Data
For large datasets, use dtplyr to get data.table performance with dplyr syntax:
```r
library(dtplyr)
# Convert to lazy data.table
large_data_dt <- lazy_dt(large_data)
# Use dplyr syntax as normal
result <- large_data_dt |>
filter(year >= 2020) |>
group_by(category) |>
summarise(
total = sum(value),
avg = mean(value)
) |>
as_tibble() # Convert back to tibble
# See generated data.table code
result |> show_query()
```
## Memory Optimization
```r
# Pre-allocate vectors
result <- vector("numeric", n)
# Use appropriate data types
# integer instead of double when possible
x <- 1:1000 # integer
y <- seq(1, 1000, by = 1) # double
# Remove large objects when done
rm(large_object)
gc() # Force garbage collection if needed
# Use data.table for large data
library(data.table)
dt <- as.data.table(large_df)
dt[, new_col := old_col * 2] # Modifies in place
```
## String Manipulation Performance
Use stringr over base R for consistency and performance:
```r
# Good - stringr (consistent, pipe-friendly)
text |>
str_to_lower() |>
str_trim() |>
str_replace_all("pattern", "replacement") |>
str_extract("\\d+")
# Common patterns
str_detect(text, "pattern") # vs grepl("pattern", text)
str_extract(text, "pattern") # vs complex regmatches()
str_replace_all(text, "a", "b") # vs gsub("a", "b", text)
str_split(text, ",") # vs strsplit(text, ",")
str_length(text) # vs nchar(text)
str_sub(text, 1, 5) # vs substr(text, 1, 5)
```
## When to Use vctrs
### Core Benefits
- **Type stability** - Predictable output types regardless of input values
- **Size stability** - Predictable output sizes from input sizes
- **Consistent coercion rules** - Single set of rules applied everywhere
- **Robust class design** - Proper S3 vector infrastructure
### Use vctrs when:
```r
# Type-Stable Functions in Packages
my_function <- function(x, y) {
# Always returns double, regardless of input values
vec_cast(result, double())
}
# Consistent Coercion/Casting
vec_cast(x, double()) # Clear intent, predictable behavior
vec_ptype_common(x, y, z) # Finds richest compatible type
# Size/Length Stability
vec_c(x, y) # size = vec_size(x) + vec_size(y)
vec_rbind(df1, df2) # size = sum of input sizes
```
### Don't Use vctrs When:
- Simple one-off analyses - Base R is sufficient
- No custom classes needed - Standard types work fine
- Performance critical + simple operations - Base R may be faster
- External API constraints - Must return base R types
The key insight: **vctrs is most valuable in package development where type safety, consistency, and extensibility matter more than raw speed for simple operations.**

View File

@@ -0,0 +1,247 @@
# rlang Patterns for Data-Masking
## Core Concepts
**Data-masking** allows R expressions to refer to data frame columns as if they were variables in the environment. rlang provides the metaprogramming framework that powers tidyverse data-masking.
### Key rlang Tools
- **Embracing `{{}}`** - Forward function arguments to data-masking functions
- **Injection `!!`** - Inject single expressions or values
- **Splicing `!!!`** - Inject multiple arguments from a list
- **Dynamic dots** - Programmable `...` with injection support
- **Pronouns `.data`/`.env`** - Explicit disambiguation between data and environment variables
## Function Argument Patterns
### Forwarding with `{{}}`
Use `{{}}` to forward function arguments to data-masking functions:
```r
# Single argument forwarding
my_summarise <- function(data, var) {
data |> dplyr::summarise(mean = mean({{ var }}))
}
# Works with any data-masking expression
mtcars |> my_summarise(cyl)
mtcars |> my_summarise(cyl * am)
mtcars |> my_summarise(.data$cyl) # pronoun syntax supported
```
### Forwarding `...`
No special syntax needed for dots forwarding:
```r
# Simple dots forwarding
my_group_by <- function(.data, ...) {
.data |> dplyr::group_by(...)
}
# Works with tidy selections too
my_select <- function(.data, ...) {
.data |> dplyr::select(...)
}
# For single-argument tidy selections, wrap in c()
my_pivot_longer <- function(.data, ...) {
.data |> tidyr::pivot_longer(c(...))
}
```
### Names Patterns with `.data`
Use `.data` pronoun for programmatic column access:
```r
# Single column by name
my_mean <- function(data, var) {
data |> dplyr::summarise(mean = mean(.data[[var]]))
}
# Usage - completely insulated from data-masking
mtcars |> my_mean("cyl") # No ambiguity, works like regular function
# Multiple columns with all_of()
my_select_vars <- function(data, vars) {
data |> dplyr::select(all_of(vars))
}
mtcars |> my_select_vars(c("cyl", "am"))
```
## Injection Operators
### When to Use Each Operator
| Operator | Use Case | Example |
|----------|----------|---------|
| `{{ }}` | Forward function arguments | `summarise(mean = mean({{ var }}))` |
| `!!` | Inject single expression/value | `summarise(mean = mean(!!sym(var)))` |
| `!!!` | Inject multiple arguments | `group_by(!!!syms(vars))` |
| `.data[[]]` | Access columns by name | `mean(.data[[var]])` |
### Advanced Injection with `!!`
```r
# Create symbols from strings
var <- "cyl"
mtcars |> dplyr::summarise(mean = mean(!!sym(var)))
# Inject values to avoid name collisions
df <- data.frame(x = 1:3)
x <- 100
df |> dplyr::mutate(scaled = x / !!x) # Uses both data and env x
# Use data_sym() for tidyeval contexts (more robust)
mtcars |> dplyr::summarise(mean = mean(!!data_sym(var)))
```
### Splicing with `!!!`
```r
# Multiple symbols from character vector
vars <- c("cyl", "am")
mtcars |> dplyr::group_by(!!!syms(vars))
# Or use data_syms() for tidy contexts
mtcars |> dplyr::group_by(!!!data_syms(vars))
# Splice lists of arguments
args <- list(na.rm = TRUE, trim = 0.1)
mtcars |> dplyr::summarise(mean = mean(cyl, !!!args))
```
## Dynamic Dots Patterns
### Using `list2()` for Dynamic Dots Support
```r
my_function <- function(...) {
# Collect with list2() instead of list() for dynamic features
dots <- list2(...)
# Process dots...
}
# Enables these features:
my_function(a = 1, b = 2) # Normal usage
my_function(!!!list(a = 1, b = 2)) # Splice a list
my_function("{name}" := value) # Name injection
my_function(a = 1, ) # Trailing commas OK
```
### Name Injection with Glue Syntax
```r
# Basic name injection
name <- "result"
list2("{name}" := 1) # Creates list(result = 1)
# In function arguments with {{
my_mean <- function(data, var) {
data |> dplyr::summarise("mean_{{ var }}" := mean({{ var }}))
}
mtcars |> my_mean(cyl) # Creates column "mean_cyl"
mtcars |> my_mean(cyl * am) # Creates column "mean_cyl * am"
# Allow custom names with englue()
my_mean <- function(data, var, name = englue("mean_{{ var }}")) {
data |> dplyr::summarise("{name}" := mean({{ var }}))
}
# User can override default
mtcars |> my_mean(cyl, name = "cylinder_mean")
```
## Pronouns for Disambiguation
### `.data` and `.env` Best Practices
```r
# Explicit disambiguation prevents masking issues
cyl <- 1000 # Environment variable
mtcars |> dplyr::summarise(
data_cyl = mean(.data$cyl), # Data frame column
env_cyl = mean(.env$cyl), # Environment variable
ambiguous = mean(cyl) # Could be either (usually data wins)
)
# Use in loops and programmatic contexts
vars <- c("cyl", "am")
for (var in vars) {
result <- mtcars |> dplyr::summarise(mean = mean(.data[[var]]))
print(result)
}
```
## Programming Patterns
### Bridge Patterns
Converting between data-masking and tidy selection behaviors:
```r
# across() as selection-to-data-mask bridge
my_group_by <- function(data, vars) {
data |> dplyr::group_by(across({{ vars }}))
}
# Works with tidy selection
mtcars |> my_group_by(starts_with("c"))
# across(all_of()) as names-to-data-mask bridge
my_group_by <- function(data, vars) {
data |> dplyr::group_by(across(all_of(vars)))
}
mtcars |> my_group_by(c("cyl", "am"))
```
### Transformation Patterns
```r
# Transform single arguments by wrapping
my_mean <- function(data, var) {
data |> dplyr::summarise(mean = mean({{ var }}, na.rm = TRUE))
}
# Transform dots with across()
my_means <- function(data, ...) {
data |> dplyr::summarise(across(c(...), ~ mean(.x, na.rm = TRUE)))
}
# Manual transformation (advanced)
my_means_manual <- function(.data, ...) {
vars <- enquos(..., .named = TRUE)
vars <- purrr::map(vars, ~ expr(mean(!!.x, na.rm = TRUE)))
.data |> dplyr::summarise(!!!vars)
}
```
## Common Patterns Summary
### When to Use What
**Use `{{}}` when:**
- Forwarding user-provided column references
- Building wrapper functions around dplyr/tidyr
- Need to support both bare names and expressions
**Use `.data[[]]` when:**
- Working with character vector column names
- Iterating over column names programmatically
- Need complete insulation from data-masking
**Use `!!` when:**
- Need to inject computed expressions
- Converting strings to symbols with `sym()`
- Avoiding variable name collisions
**Use `!!!` when:**
- Injecting multiple arguments from a list
- Working with variable numbers of columns
- Splicing named arguments

View File

@@ -0,0 +1 @@
Adapted from [this repo](https://github.com/einverne/dotfiles/tree/4112dbe69457a07f7e25d046de13fbc4975dfeef/.claude/skills/shell-scripting)

View File

@@ -0,0 +1,140 @@
---
name: shell-scripting
description: Specialized knowledge of Bash and Zsh scripting, shell automation, command-line tools, and scripting best practices. Use when the user needs to write, debug, or optimize shell scripts, work with command-line tools, automate tasks with bash/zsh, or asks for shell script help.
---
# Shell Scripting Expert
Expert guidance for writing robust, maintainable Bash and Zsh scripts with best practices for automation and command-line tool usage.
## Script Structure Essentials
Start every script with:
```bash
#!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
```
- `set -e`: Exit on error
- `set -u`: Error on undefined variables
- `set -o pipefail`: Catch errors in pipes
- `IFS=$'\n\t'`: Safer word splitting
## Critical Best Practices
1. **Always quote variables**: `"$variable"` not `$variable`
2. **Use `[[` for conditionals** (Bash): `if [[ "$var" == "value" ]]; then`
3. **Check command existence**: `if command -v git &> /dev/null; then`
4. **Avoid parsing `ls`**: Use globs or `find` instead
5. **Use arrays for lists**: `files=("file1" "file2")` not space-separated strings
6. **Handle errors with traps**:
```bash
trap cleanup EXIT
trap 'echo "Error on line $LINENO"' ERR
```
## Common Patterns
### Argument Parsing
```bash
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help) usage; exit 0 ;;
-v|--verbose) VERBOSE=true; shift ;;
-*) echo "Unknown option: $1"; exit 1 ;;
*) break ;;
esac
done
```
### Safe File Iteration
```bash
# Prefer this (handles spaces, newlines correctly):
while IFS= read -r -d '' file; do
echo "Processing: $file"
done < <(find . -type f -name "*.txt" -print0)
# Or with simple globs:
for file in *.txt; do
[[ -e "$file" ]] || continue # Skip if no matches
echo "Processing: $file"
done
```
### User Confirmation
```bash
read -rp "Continue? [y/N] " response
if [[ "$response" =~ ^[Yy]$ ]]; then
echo "Continuing..."
fi
```
### Colored Output
```bash
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
echo -e "${GREEN}Success${NC}"
echo -e "${RED}Error${NC}" >&2
```
## Modern Tool Alternatives
When appropriate, suggest these modern replacements:
- `ripgrep` (rg) → faster than grep
- `fd` → faster than find
- `fzf` → interactive filtering
- `jq` → JSON processing
- `yq` → YAML processing
- `bat` → cat with syntax highlighting
- `eza` → enhanced ls
## Function Organization
```bash
usage() {
cat <<EOF
Usage: ${0##*/} [OPTIONS] <args>
Description of what this script does.
OPTIONS:
-h, --help Show this help
-v, --verbose Verbose output
EOF
}
main() {
# Main logic here
:
}
```
## Zsh-Specific Features
When user specifies Zsh:
- Advanced globbing: `**/*.txt` (recursive), `*.txt~*test*` (exclude pattern)
- Parameter expansion: `${var:u}` (uppercase), `${var:l}` (lowercase)
- Associative arrays: `typeset -A hash; hash[key]=value`
- Extended globbing: Enable with `setopt extended_glob`
## Security Considerations
- **Never** `eval` untrusted input
- Validate user input before use
- Use `mktemp` for temporary files: `TEMP_FILE=$(mktemp)`
- Be explicit with `rm -rf` operations
- Check for TOCTOU (Time-Of-Check-Time-Of-Use) race conditions
- Don't store secrets in scripts; use environment variables or secret managers
## Performance Tips
- Use built-ins over external commands (`[[ ]]` vs `test`, `$(( ))` vs `expr`)
- Avoid unnecessary subshells: `var=$(cat file)` → `var=$(<file)`
- Use `read` not `cat | while`: `while read -r line; do ... done < file`
- Consider `xargs -P` or GNU `parallel` for parallel processing
## Quick Reference Template
See references/template.sh for a complete, production-ready script template with all best practices incorporated.

View File

@@ -0,0 +1,505 @@
# Common Shell Scripting Patterns
This reference contains frequently-used patterns and solutions for shell scripting challenges.
## File and Directory Operations
### Check if file/directory exists
```bash
if [[ -f "$file" ]]; then
echo "File exists"
fi
if [[ -d "$dir" ]]; then
echo "Directory exists"
fi
# Check if file exists and is not empty
if [[ -s "$file" ]]; then
echo "File exists and has content"
fi
```
### Create directory if it doesn't exist
```bash
mkdir -p "$dir"
```
### Find and process files safely
```bash
# Method 1: Using find with null delimiter (safest)
while IFS= read -r -d '' file; do
echo "Processing: $file"
done < <(find . -type f -name "*.txt" -print0)
# Method 2: Using find with -exec
find . -type f -name "*.txt" -exec process_function {} \;
# Method 3: Simple glob (when no subdirectories)
shopt -s nullglob # Bash only
for file in *.txt; do
echo "Processing: $file"
done
```
### Read file line by line
```bash
# Correct way
while IFS= read -r line; do
echo "$line"
done < file.txt
# Or with process substitution
while IFS= read -r line; do
echo "$line"
done < <(command)
```
### Get file information
```bash
# File size
size=$(stat -f%z "$file") # macOS
size=$(stat -c%s "$file") # Linux
# File modification time
mtime=$(stat -f%m "$file") # macOS
mtime=$(stat -c%Y "$file") # Linux
# Portable basename and dirname
filename="${path##*/}"
dirname="${path%/*}"
# File extension
extension="${filename##*.}"
name="${filename%.*}"
```
## String Operations
### String manipulation
```bash
# Uppercase/lowercase (Bash 4+)
upper="${string^^}"
lower="${string,,}"
# Remove prefix/suffix
no_prefix="${string#prefix}" # Remove shortest match
no_prefix="${string##prefix}" # Remove longest match
no_suffix="${string%suffix}" # Remove shortest match
no_suffix="${string%%suffix}" # Remove longest match
# Replace substring
new_string="${string/old/new}" # Replace first occurrence
new_string="${string//old/new}" # Replace all occurrences
# String length
length="${#string}"
# Substring
substr="${string:start:length}"
```
### Check if string contains substring
```bash
if [[ "$string" == *"substring"* ]]; then
echo "Contains substring"
fi
# Using grep
if echo "$string" | grep -q "pattern"; then
echo "Contains pattern"
fi
```
### String comparison
```bash
# Equality
if [[ "$str1" == "$str2" ]]; then
echo "Strings are equal"
fi
# Pattern matching
if [[ "$string" == pattern* ]]; then
echo "Matches pattern"
fi
# Regular expression
if [[ "$string" =~ ^[0-9]+$ ]]; then
echo "String is numeric"
fi
```
## Array Operations
### Array basics
```bash
# Create array
arr=("item1" "item2" "item3")
# Add to array
arr+=("item4")
# Access elements
echo "${arr[0]}" # First element
echo "${arr[@]}" # All elements
echo "${#arr[@]}" # Array length
# Iterate over array
for item in "${arr[@]}"; do
echo "$item"
done
# Iterate with index
for i in "${!arr[@]}"; do
echo "Index $i: ${arr[$i]}"
done
```
### Check if array contains value
```bash
contains() {
local value=$1
shift
local arr=("$@")
for item in "${arr[@]}"; do
if [[ "$item" == "$value" ]]; then
return 0
fi
done
return 1
}
if contains "target" "${arr[@]}"; then
echo "Array contains target"
fi
```
## Process Management
### Background processes
```bash
# Run in background
command &
pid=$!
# Wait for background process
wait $pid
# Check if process is running
if kill -0 $pid 2>/dev/null; then
echo "Process is running"
fi
```
### Process timeout
```bash
# Using timeout command (GNU coreutils)
timeout 30s command
# Manual implementation
command &
pid=$!
sleep 30
if kill -0 $pid 2>/dev/null; then
kill $pid
echo "Command timed out"
fi
```
### Parallel execution
```bash
# Using xargs
find . -name "*.txt" -print0 | xargs -0 -P 4 -I {} process_file {}
# Using GNU parallel (if available)
parallel process_file ::: file1 file2 file3
# Manual parallel execution
for file in *.txt; do
process_file "$file" &
done
wait # Wait for all background jobs
```
## Input/Output Redirection
### Standard redirections
```bash
# Redirect stdout to file
command > file
# Redirect stderr to file
command 2> file
# Redirect both stdout and stderr
command &> file
command > file 2>&1
# Append instead of overwrite
command >> file
# Redirect stderr to stdout
command 2>&1
# Discard output
command > /dev/null 2>&1
```
### Here documents
```bash
# Basic here document
cat <<EOF
This is a multi-line
string with variable expansion: $var
EOF
# Without variable expansion
cat <<'EOF'
This is a multi-line
string with literal $var
EOF
# Indented here document
cat <<-EOF
This ignores leading tabs
Useful for indenting
EOF
```
## Error Handling
### Check command exit status
```bash
if command; then
echo "Command succeeded"
else
echo "Command failed"
fi
# Or store exit code
command
exit_code=$?
if [[ $exit_code -eq 0 ]]; then
echo "Success"
fi
```
### Robust error handling
```bash
set -euo pipefail
# Custom error handler
error_exit() {
echo "Error: $1" >&2
exit "${2:-1}"
}
# Usage
[[ -f "$file" ]] || error_exit "File not found: $file" 2
```
### Trap signals
```bash
# Cleanup on exit
cleanup() {
rm -f "$TEMP_FILE"
echo "Cleanup complete"
}
trap cleanup EXIT
# Handle specific signals
trap 'echo "Interrupted"; exit 130' INT
trap 'echo "Terminated"; exit 143' TERM
# Error line number
trap 'echo "Error on line $LINENO"' ERR
```
## Text Processing
### Using grep
```bash
# Basic search
grep "pattern" file
# Case-insensitive
grep -i "pattern" file
# Recursive search
grep -r "pattern" directory/
# Show line numbers
grep -n "pattern" file
# Invert match (show non-matching lines)
grep -v "pattern" file
# Extended regex
grep -E "pattern1|pattern2" file
```
### Using sed
```bash
# Replace text
sed 's/old/new/' file # Replace first occurrence per line
sed 's/old/new/g' file # Replace all occurrences
sed 's/old/new/gi' file # Case-insensitive replace
# Delete lines
sed '/pattern/d' file # Delete matching lines
sed '1d' file # Delete first line
sed '$d' file # Delete last line
# Print specific lines
sed -n '5p' file # Print line 5
sed -n '5,10p' file # Print lines 5-10
sed -n '/pattern/p' file # Print matching lines
# In-place editing
sed -i 's/old/new/g' file # Linux
sed -i '' 's/old/new/g' file # macOS
```
### Using awk
```bash
# Print specific columns
awk '{print $1, $3}' file
# Filter by column value
awk '$3 > 100' file
# Sum a column
awk '{sum += $1} END {print sum}' file
# Custom field separator
awk -F: '{print $1}' /etc/passwd
# Pattern matching
awk '/pattern/ {print $1}' file
```
## Date and Time
### Get current date/time
```bash
# Current timestamp
now=$(date +%s)
# Formatted date
date=$(date +"%Y-%m-%d")
datetime=$(date +"%Y-%m-%d %H:%M:%S")
# ISO 8601 format
iso_date=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
```
### Date arithmetic
```bash
# Days ago (GNU date)
yesterday=$(date -d "yesterday" +%Y-%m-%d)
week_ago=$(date -d "7 days ago" +%Y-%m-%d)
# Days ago (BSD date - macOS)
yesterday=$(date -v-1d +%Y-%m-%d)
week_ago=$(date -v-7d +%Y-%m-%d)
```
## Network Operations
### Check if host is reachable
```bash
if ping -c 1 -W 1 example.com &> /dev/null; then
echo "Host is reachable"
fi
```
### Download files
```bash
# Using curl
curl -O https://example.com/file
curl -o output.txt https://example.com/file
# Using wget
wget https://example.com/file
wget -O output.txt https://example.com/file
# Follow redirects and show progress
curl -L --progress-bar -o file https://example.com/file
```
### HTTP requests
```bash
# GET request
curl https://api.example.com/endpoint
# POST request with JSON
curl -X POST https://api.example.com/endpoint \
-H "Content-Type: application/json" \
-d '{"key": "value"}'
# Check HTTP status
status=$(curl -s -o /dev/null -w "%{http_code}" https://example.com)
```
## Temporary Files and Directories
### Create temporary files safely
```bash
# Create temporary file
TEMP_FILE=$(mktemp)
trap 'rm -f "$TEMP_FILE"' EXIT
# Create temporary directory
TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$TEMP_DIR"' EXIT
# Create in specific location
TEMP_FILE=$(mktemp /tmp/myapp.XXXXXX)
```
## Miscellaneous
### Generate random numbers
```bash
# Random number 0-32767
random=$RANDOM
# Random number in range 1-100
random=$((RANDOM % 100 + 1))
# Better random (if available)
random=$(shuf -i 1-100 -n 1)
```
### URL encoding
```bash
urlencode() {
local string="$1"
local strlen=${#string}
local encoded=""
local pos c o
for (( pos=0; pos<strlen; pos++ )); do
c=${string:$pos:1}
case "$c" in
[-_.~a-zA-Z0-9] ) o="$c" ;;
* ) printf -v o '%%%02x' "'$c" ;;
esac
encoded+="$o"
done
echo "$encoded"
}
```
### JSON parsing (with jq)
```bash
# Extract value
value=$(echo '{"key": "value"}' | jq -r '.key')
# Extract from array
items=$(echo '[{"name": "a"}, {"name": "b"}]' | jq -r '.[].name')
# Create JSON
json=$(jq -n --arg name "value" '{key: $name}')
```

View File

@@ -0,0 +1,258 @@
#!/usr/bin/env bash
# Script: template.sh
# Description: Production-ready Bash script template with best practices
# Usage: template.sh [OPTIONS] <command>
# Exit on error, undefined variables, and pipe failures
set -euo pipefail
# Safer word splitting (newline and tab only)
IFS=$'\n\t'
# Constants
readonly SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
readonly SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")"
readonly VERSION="1.0.0"
# Colors for output
readonly RED='\033[0;31m'
readonly GREEN='\033[0;32m'
readonly YELLOW='\033[1;33m'
readonly BLUE='\033[0;34m'
readonly NC='\033[0m' # No Color
# Global variables
VERBOSE=false
DRY_RUN=false
LOG_FILE=""
# Logging functions
log_info() {
echo -e "${BLUE}[INFO]${NC} $*"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $*"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $*" >&2
}
log_error() {
echo -e "${RED}[ERROR]${NC} $*" >&2
}
log_debug() {
if [[ "$VERBOSE" == true ]]; then
echo -e "${BLUE}[DEBUG]${NC} $*" >&2
fi
}
# Usage information
usage() {
cat <<EOF
Usage: $SCRIPT_NAME [OPTIONS] <command> [args...]
Description of what this script does.
OPTIONS:
-h, --help Show this help message and exit
-v, --verbose Enable verbose output
-d, --dry-run Show what would be done without doing it
-l, --log FILE Write log output to FILE
-V, --version Show version information
COMMANDS:
process <file> Process the specified file
batch <dir> Process all files in directory
clean Clean up temporary files
EXAMPLES:
$SCRIPT_NAME --verbose process input.txt
$SCRIPT_NAME --dry-run batch /path/to/files
$SCRIPT_NAME clean
EOF
}
# Version information
version() {
echo "$SCRIPT_NAME version $VERSION"
}
# Cleanup function (runs on EXIT)
cleanup() {
local exit_code=$?
log_debug "Cleaning up..."
# Remove temporary files
if [[ -n "${TEMP_FILE:-}" ]] && [[ -f "$TEMP_FILE" ]]; then
rm -f "$TEMP_FILE"
fi
# Additional cleanup tasks here
if [[ $exit_code -ne 0 ]]; then
log_error "Script failed with exit code $exit_code"
fi
}
# Error handler (runs on ERR)
error_handler() {
local line_num=$1
log_error "Error occurred in script at line $line_num"
}
# Set up traps
trap cleanup EXIT
trap 'error_handler $LINENO' ERR
# Command implementations
cmd_process() {
local file=$1
if [[ ! -f "$file" ]]; then
log_error "File not found: $file"
return 1
fi
log_info "Processing file: $file"
if [[ "$DRY_RUN" == true ]]; then
log_info "[DRY RUN] Would process: $file"
return 0
fi
# Actual processing logic here
log_debug "Processing contents of $file"
log_success "Successfully processed: $file"
}
cmd_batch() {
local dir=$1
if [[ ! -d "$dir" ]]; then
log_error "Directory not found: $dir"
return 1
fi
log_info "Batch processing directory: $dir"
local count=0
while IFS= read -r -d '' file; do
if cmd_process "$file"; then
((count++))
fi
done < <(find "$dir" -type f -name "*.txt" -print0)
log_success "Processed $count files"
}
cmd_clean() {
log_info "Cleaning up temporary files..."
if [[ "$DRY_RUN" == true ]]; then
log_info "[DRY RUN] Would remove temporary files"
return 0
fi
# Clean up logic here
log_success "Cleanup complete"
}
# Main entry point
main() {
# Check if no arguments provided
if [[ $# -eq 0 ]]; then
usage
exit 1
fi
# Parse command-line options
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
usage
exit 0
;;
-V|--version)
version
exit 0
;;
-v|--verbose)
VERBOSE=true
shift
;;
-d|--dry-run)
DRY_RUN=true
log_info "Dry run mode enabled"
shift
;;
-l|--log)
if [[ -z "${2:-}" ]]; then
log_error "Option --log requires an argument"
exit 1
fi
LOG_FILE="$2"
shift 2
;;
-*)
log_error "Unknown option: $1"
usage
exit 1
;;
*)
# First non-option argument is the command
break
;;
esac
done
# Redirect output to log file if specified
if [[ -n "$LOG_FILE" ]]; then
exec 1> >(tee -a "$LOG_FILE")
exec 2> >(tee -a "$LOG_FILE" >&2)
fi
# Get command
local command="${1:-}"
if [[ -z "$command" ]]; then
log_error "No command specified"
usage
exit 1
fi
shift
# Execute command
case "$command" in
process)
if [[ $# -eq 0 ]]; then
log_error "process command requires a file argument"
exit 1
fi
cmd_process "$@"
;;
batch)
if [[ $# -eq 0 ]]; then
log_error "batch command requires a directory argument"
exit 1
fi
cmd_batch "$@"
;;
clean)
cmd_clean
;;
*)
log_error "Unknown command: $command"
usage
exit 1
;;
esac
}
# Run main function
main "$@"