blob: e043eb9d9db3d182ad2ff496d84005c829a87173 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
/*
* Copyright (c) 2013-2014 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _KERN_LLSYNC_I_H
#define _KERN_LLSYNC_I_H
#include <assert.h>
#include <stdalign.h>
#include <kern/cpumap.h>
#include <kern/macros.h>
#include <kern/spinlock.h>
#include <kern/syscnt.h>
#include <kern/work.h>
#include <machine/cpu.h>
/*
* Global data.
*
* The queue number matches the number of global checkpoints that occurred
* since works contained in it were added. After two global checkpoints,
* works are scheduled for processing.
*
* Interrupts must be disabled when acquiring the global data lock.
*/
struct llsync_data {
struct spinlock lock;
struct cpumap registered_cpus;
unsigned int nr_registered_cpus;
struct cpumap pending_checkpoints;
unsigned int nr_pending_checkpoints;
int no_warning;
struct work_queue queue0;
struct work_queue queue1;
struct syscnt sc_global_checkpoints;
struct syscnt sc_periodic_checkins;
struct syscnt sc_failed_periodic_checkins;
/*
* Global checkpoint ID.
*
* This variable can be frequently accessed from many processors so :
* - reserve a whole cache line for it
* - apply optimistic accesses to reduce contention
*/
struct {
alignas(CPU_L1_SIZE) volatile unsigned int value;
} gcid;
};
extern struct llsync_data llsync_data;
/*
* Per-processor data.
*
* Every processor records whether it is registered and a local copy of the
* global checkpoint ID, which is meaningless on unregistered processors.
* The true global checkpoint ID is incremented when a global checkpoint occurs,
* after which all the local copies become stale. Checking in synchronizes
* the local copy of the global checkpoint ID.
*
* When works are deferred, they are initially added to a processor-local
* queue. This queue is regularly flushed to the global data, an operation
* that occurs every time a processor may commit a checkpoint. The downside
* of this scalability optimization is that it introduces some additional
* latency for works that are added to a processor queue between a flush and
* a global checkpoint.
*
* Interrupts and preemption must be disabled on access.
*/
struct llsync_cpu_data {
int registered;
unsigned int gcid;
struct work_queue queue0;
};
extern struct llsync_cpu_data llsync_cpu_data;
static inline struct llsync_cpu_data *
llsync_get_cpu_data(void)
{
return cpu_local_ptr(llsync_cpu_data);
}
static inline void
llsync_checkin(void)
{
struct llsync_cpu_data *cpu_data;
unsigned int gcid;
assert(!cpu_intr_enabled());
assert(!thread_preempt_enabled());
cpu_data = llsync_get_cpu_data();
if (!cpu_data->registered) {
assert(work_queue_nr_works(&cpu_data->queue0) == 0);
return;
}
/*
* The global checkpoint ID obtained might be obsolete here, in which
* case a commit will not determine that a checkpoint actually occurred.
* This should seldom happen.
*/
gcid = llsync_data.gcid.value;
assert((gcid - cpu_data->gcid) <= 1);
cpu_data->gcid = gcid;
}
#endif /* _KERN_LLSYNC_I_H */
|